From 41fad86d18f814c9ede0ab8221a637fd45ddb195 Mon Sep 17 00:00:00 2001 From: Shiwen Cheng Date: Wed, 31 Mar 2021 00:13:21 +0800 Subject: [PATCH] [1.3.6-prepare][Improvement][Docker/K8s] Support more configs, more service access, skywalking configs, improve image for python, update faq and add support matrix (#5158) * [1.3.6-prepare][Improvement][Config] Update config * [1.3.6-prepare][Improvement][Docker&K8s] Sync the latest config * [1.3.6-prepare][Improvement][Docker] Fix mysql check and remove redundant config * [1.3.6-prepare][Improvement][Config] Update config for common.properties * [1.3.6-prepare][Improvement][Config] Add config.env for docker compose and swarm * [1.3.6-prepare][Improvement][K8s] Add common properties and reduce duplication for K8s * [1.3.6-prepare][Improvement][K8s] Support more service access like ClusterIP, NodePort and LoadBalancer * [1.3.6-prepare][Improvement][K8s] Unify annotations, affinity, nodeSelector, tolerations, resources and probe in K8s * [1.3.6-prepare][Improvement][Docker&K8s] Support skywalking config in docker & k8s * [1.3.6-prepare][Improvement][Docker] Rename config.env to config.env.sh * [1.3.6-prepare][Improvement][Docker] Replace alpine with debian:slim * [1.3.6-prepare][Improvement][Docker] Remove postgresql-client and mysql-client * [1.3.6-prepare][Improvement][Docker&K8s] Add faq for python, spark, hadoop, flink and datax * [1.3.6-prepare][Improvement][Docker&K8s] Add support matrix for docker/k8s --- .../1.3.5/configuration/dolphin-common.xml | 80 +++-- .../1.3.5/configuration/dolphin-master.xml | 42 ++- .../1.3.5/configuration/dolphin-worker.xml | 29 +- docker/build/Dockerfile | 33 +- docker/build/README.md | 339 +++++++++++++++--- docker/build/README_zh_CN.md | 335 ++++++++++++++--- .../dolphinscheduler/common.properties.tpl | 42 ++- .../dolphinscheduler/master.properties.tpl | 17 +- .../dolphinscheduler/worker.properties.tpl | 12 +- docker/build/startup-init-conf.sh | 63 +++- docker/build/startup.sh | 51 +-- docker/docker-swarm/config.env.sh | 136 +++++++ docker/docker-swarm/docker-compose.yml | 108 +----- docker/docker-swarm/docker-stack.yml | 116 +----- docker/kubernetes/dolphinscheduler/README.md | 245 +++++++++++-- .../dolphinscheduler/templates/NOTES.txt | 28 +- .../dolphinscheduler/templates/_helpers.tpl | 176 +++++++++ .../configmap-dolphinscheduler-alert.yaml | 21 +- .../configmap-dolphinscheduler-api.yaml | 7 +- .../configmap-dolphinscheduler-common.yaml | 14 +- .../configmap-dolphinscheduler-master.yaml | 15 +- .../configmap-dolphinscheduler-worker.yaml | 14 +- .../deployment-dolphinscheduler-alert.yaml | 174 +-------- .../deployment-dolphinscheduler-api.yaml | 170 ++------- .../dolphinscheduler/templates/ingress.yaml | 3 +- .../templates/pvc-dolphinscheduler-alert.yaml | 3 +- .../templates/pvc-dolphinscheduler-api.yaml | 3 +- .../pvc-dolphinscheduler-fs-file.yaml | 3 +- .../pvc-dolphinscheduler-shared.yaml | 3 +- .../templates/secret-external-database.yaml | 3 +- .../templates/secret-external-fs-s3a.yaml | 3 +- .../statefulset-dolphinscheduler-master.yaml | 187 ++-------- .../statefulset-dolphinscheduler-worker.yaml | 275 ++------------ .../templates/svc-dolphinscheduler-api.yaml | 31 +- .../svc-dolphinscheduler-master-headless.yaml | 10 +- .../svc-dolphinscheduler-worker-headless.yaml | 10 +- .../kubernetes/dolphinscheduler/values.yaml | 75 ++-- .../dolphinscheduler/common/Constants.java | 7 - .../common/utils/HadoopUtils.java | 36 +- .../src/main/resources/common.properties | 28 +- .../main/assembly/dolphinscheduler-binary.xml | 2 +- .../server/master/config/MasterConfig.java | 18 +- .../server/worker/config/WorkerConfig.java | 20 +- .../src/main/resources/master.properties | 15 +- .../src/main/resources/worker.properties | 16 +- script/dolphinscheduler-daemon.sh | 41 +-- script/scp-hosts.sh | 2 +- tools/dependencies/check-LICENSE.sh | 6 +- 48 files changed, 1650 insertions(+), 1417 deletions(-) create mode 100755 docker/docker-swarm/config.env.sh diff --git a/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-common.xml b/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-common.xml index 439e21188a..f0dafab9d1 100644 --- a/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-common.xml +++ b/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-common.xml @@ -15,11 +15,19 @@ ~ limitations under the License. --> + + data.basedir.path + /tmp/dolphinscheduler + + user data local directory path, please make sure the directory exists and have read write permissions + + + resource.storage.type Choose Resource Upload Startup Type - Resource upload startup type : HDFS,S3,NONE + resource storage type: HDFS, S3, NONE NONE @@ -46,19 +54,10 @@ resource.upload.path /dolphinscheduler - resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions。"/dolphinscheduler" is recommended - - - - - data.basedir.path - /tmp/dolphinscheduler - - user data local directory path, please make sure the directory exists and have read write permissions + resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended - hadoop.security.authentication.startup.state false @@ -76,7 +75,9 @@ 1 - whether kerberos starts + + whether to startup kerberos + java.security.krb5.conf.path @@ -90,7 +91,7 @@ login.user.keytab.username hdfs-mycluster@ESZ.COM - LoginUserFromKeytab user + login user from keytab username @@ -98,20 +99,29 @@ login.user.keytab.path /opt/hdfs.headless.keytab - LoginUserFromKeytab path + login user from keytab path + + kerberos.expire.time + 2 + + kerberos expire time, the unit is hour + + resource.view.suffixs - txt,log,sh,conf,cfg,py,java,sql,hql,xml,properties - + txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js + + resource view suffixs + hdfs.root.user hdfs - Users who have permission to create directories under the HDFS root path + if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path @@ -119,9 +129,7 @@ fs.defaultFS hdfs://mycluster:8020 - HA or single namenode, - If namenode ha needs to copy core-site.xml and hdfs-site.xml to the conf directory, - support s3,for example : s3a://dolphinscheduler + if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir @@ -129,7 +137,7 @@ fs.s3a.endpoint http://host:9010 - s3 need,s3 endpoint + s3 required,s3 endpoint @@ -137,7 +145,7 @@ fs.s3a.access.key A3DXS30FO22544RE - s3 need,s3 access key + s3 required,s3 access key @@ -145,14 +153,32 @@ fs.s3a.secret.key OloCLq3n+8+sdPHUhJ21XrSxTC+JK - s3 need,s3 secret key + s3 required,s3 secret key - kerberos.expire.time - 7 - + yarn.resourcemanager.ha.rm.ids + 192.168.xx.xx,192.168.xx.xx + + if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty + + + + + yarn.application.status.address + http://ds1:8088/ws/v1/cluster/apps/%s + + if resourcemanager HA is enabled or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname + + + + + dolphinscheduler.env.path + env/dolphinscheduler_env.sh + + system env path + + - \ No newline at end of file diff --git a/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-master.xml b/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-master.xml index c8eec047fc..472c5e3261 100644 --- a/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-master.xml +++ b/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-master.xml @@ -15,13 +15,22 @@ ~ limitations under the License. --> + + master.listen.port + 5678 + + int + + master listen port + + master.exec.threads 100 int - master execute thread num + master execute thread number @@ -33,6 +42,21 @@ master execute task number in parallel + + master.dispatch.task.num + 3 + + int + + master dispatch task number + + + + master.host.selector + LowerWeight + master host selector to select a suitable worker, default value: LowerWeight. Optional values include Random, RoundRobin, LowerWeight + + master.heartbeat.interval 10 @@ -62,27 +86,17 @@ master.max.cpuload.avg - 100 + -1 int - only less than cpu avg load, master server can work. default value : the number of cpu cores * 2 + only less than cpu avg load, master server can work. default value -1: the number of cpu cores * 2 master.reserved.memory 0.3 - only larger than reserved memory, master server can work. default value : physical memory * 1/10, unit is G. - - - - - master.listen.port - 5678 - - int - - master listen port + only larger than reserved memory, master server can work. default value 0.3, the unit is G \ No newline at end of file diff --git a/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-worker.xml b/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-worker.xml index 1ae7a1a765..ced9aab203 100644 --- a/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-worker.xml +++ b/ambari_plugin/common-services/DOLPHIN/1.3.5/configuration/dolphin-worker.xml @@ -15,13 +15,22 @@ ~ limitations under the License. --> + + worker.listen.port + 1234 + + int + + worker listen port + + worker.exec.threads 100 int - worker execute thread num + worker execute thread number @@ -35,33 +44,23 @@ worker.max.cpuload.avg - 100 + -1 int - only less than cpu avg load, worker server can work. default value : the number of cpu cores * 2 + only less than cpu avg load, worker server can work. default value -1: the number of cpu cores * 2 worker.reserved.memory 0.3 - only larger than reserved memory, worker server can work. default value : physical memory * 1/10, unit is G. - - - - - worker.listen.port - 1234 - - int - - worker listen port + only larger than reserved memory, worker server can work. default value 0.3, the unit is G worker.groups default - default worker group + default worker groups separated by comma, like 'worker.groups=default,test' \ No newline at end of file diff --git a/docker/build/Dockerfile b/docker/build/Dockerfile index 06917566f8..e4f5c2ce26 100644 --- a/docker/build/Dockerfile +++ b/docker/build/Dockerfile @@ -15,23 +15,29 @@ # limitations under the License. # -FROM openjdk:8-jdk-alpine +FROM openjdk:8-jre-slim-buster ARG VERSION +ARG DEBIAN_FRONTEND=noninteractive ENV TZ Asia/Shanghai ENV LANG C.UTF-8 ENV DOCKER true # 1. install command/library/software -# If install slowly, you can replcae alpine's mirror with aliyun's mirror, Example: -# RUN sed -i "s/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g" /etc/apk/repositories -# RUN sed -i 's/dl-cdn.alpinelinux.org/mirror.tuna.tsinghua.edu.cn/g' /etc/apk/repositories -RUN apk update && \ - apk add --no-cache tzdata dos2unix bash python2 python3 supervisor procps sudo shadow tini postgresql-client && \ - cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \ - apk del tzdata && \ - rm -rf /var/cache/apk/* +# If install slowly, you can replcae debian's mirror with new mirror, Example: +# RUN { \ +# echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ buster main contrib non-free"; \ +# echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ buster-updates main contrib non-free"; \ +# echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ buster-backports main contrib non-free"; \ +# echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian-security buster/updates main contrib non-free"; \ +# } > /etc/apt/sources.list +RUN apt-get update && \ + apt-get install -y --no-install-recommends tzdata dos2unix python supervisor procps netcat sudo tini && \ + echo "Asia/Shanghai" > /etc/timezone && \ + rm -f /etc/localtime && \ + dpkg-reconfigure tzdata && \ + rm -rf /var/lib/apt/lists/* /tmp/* # 2. add dolphinscheduler ADD ./apache-dolphinscheduler-incubating-${VERSION}-dolphinscheduler-bin.tar.gz /opt/ @@ -44,19 +50,20 @@ COPY ./startup-init-conf.sh /root/startup-init-conf.sh COPY ./startup.sh /root/startup.sh COPY ./conf/dolphinscheduler/*.tpl /opt/dolphinscheduler/conf/ COPY ./conf/dolphinscheduler/logback/* /opt/dolphinscheduler/conf/ -COPY ./conf/dolphinscheduler/supervisor/supervisor.ini /etc/supervisor.d/ +COPY ./conf/dolphinscheduler/supervisor/supervisor.ini /etc/supervisor/conf.d/ COPY ./conf/dolphinscheduler/env/dolphinscheduler_env.sh.tpl /opt/dolphinscheduler/conf/env/ -RUN dos2unix /root/checkpoint.sh && \ +RUN sed -i 's/*.conf$/*.ini/' /etc/supervisor/supervisord.conf && \ + dos2unix /root/checkpoint.sh && \ dos2unix /root/startup-init-conf.sh && \ dos2unix /root/startup.sh && \ dos2unix /opt/dolphinscheduler/script/*.sh && \ dos2unix /opt/dolphinscheduler/bin/*.sh && \ rm -rf /bin/sh && \ ln -s /bin/bash /bin/sh && \ - mkdir -p /var/mail /tmp/xls && \ + mkdir -p /tmp/xls && \ echo "Set disable_coredump false" >> /etc/sudo.conf # 4. expose port EXPOSE 5678 1234 12345 50051 -ENTRYPOINT ["/sbin/tini", "--", "/root/startup.sh"] +ENTRYPOINT ["/usr/bin/tini", "--", "/root/startup.sh"] diff --git a/docker/build/README.md b/docker/build/README.md index 19d807d8d1..97957c642a 100644 --- a/docker/build/README.md +++ b/docker/build/README.md @@ -34,15 +34,15 @@ The default username is `admin` and the default password is `dolphinscheduler123 > **Tip**: For quick start in docker, you can create a tenant named `ds` and associate the user `admin` with the tenant `ds` -#### Or via Environment Variables **`DATABASE_HOST`** **`DATABASE_PORT`** **`DATABASE_DATABASE`** **`ZOOKEEPER_QUORUM`** +#### Or via Environment Variables **`DATABASE_HOST`**, **`DATABASE_PORT`**, **`ZOOKEEPER_QUORUM`** You can specify **existing postgres and zookeeper service**. Example: ``` $ docker run -d --name dolphinscheduler \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -p 12345:12345 \ apache/dolphinscheduler:latest all ``` @@ -63,9 +63,9 @@ docker volume create dolphinscheduler-resource-local ``` $ docker run -d --name dolphinscheduler-master \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ apache/dolphinscheduler:latest master-server ``` @@ -73,9 +73,9 @@ apache/dolphinscheduler:latest master-server ``` $ docker run -d --name dolphinscheduler-worker \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -v dolphinscheduler-resource-local:/dolphinscheduler \ apache/dolphinscheduler:latest worker-server ``` @@ -84,9 +84,9 @@ apache/dolphinscheduler:latest worker-server ``` $ docker run -d --name dolphinscheduler-api \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -v dolphinscheduler-resource-local:/dolphinscheduler \ -p 12345:12345 \ apache/dolphinscheduler:latest api-server @@ -101,7 +101,7 @@ $ docker run -d --name dolphinscheduler-alert \ apache/dolphinscheduler:latest alert-server ``` -**Note**: You must be specify `DATABASE_HOST` `DATABASE_PORT` `DATABASE_DATABASE` `DATABASE_USERNAME` `DATABASE_PASSWORD` `ZOOKEEPER_QUORUM` when start a standalone dolphinscheduler server. +**Note**: You must be specify `DATABASE_HOST`, `DATABASE_PORT`, `DATABASE_DATABASE`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `ZOOKEEPER_QUORUM` when start a standalone dolphinscheduler server. ## How to build a docker image @@ -122,10 +122,50 @@ C:\incubator-dolphinscheduler>.\docker\build\hooks\build.bat Please read `./docker/build/hooks/build` `./docker/build/hooks/build.bat` script files if you don't understand +## Support Matrix + +| Type | Support | Notes | +| ------------------------------------------------------------ | ------------ | ------------------------------------- | +| Shell | Yes | | +| Python2 | Yes | | +| Python3 | Indirect Yes | Refer to FAQ | +| Hadoop2 | Indirect Yes | Refer to FAQ | +| Hadoop3 | Not Sure | Not tested | +| Spark-Local(client) | Indirect Yes | Refer to FAQ | +| Spark-YARN(cluster) | Indirect Yes | Refer to FAQ | +| Spark-Mesos(cluster) | Not Yet | | +| Spark-Standalone(cluster) | Not Yet | | +| Spark-Kubernetes(cluster) | Not Yet | | +| Flink-Local(local>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-YARN(yarn-cluster) | Indirect Yes | Refer to FAQ | +| Flink-YARN(yarn-session/yarn-per-job/yarn-application>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-Mesos(default) | Not Yet | | +| Flink-Mesos(remote>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-Standalone(default) | Not Yet | | +| Flink-Standalone(remote>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-Kubernetes(default) | Not Yet | | +| Flink-Kubernetes(remote>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-NativeKubernetes(kubernetes-session/application>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| MapReduce | Indirect Yes | Refer to FAQ | +| Kerberos | Indirect Yes | Refer to FAQ | +| HTTP | Yes | | +| DataX | Indirect Yes | Refer to FAQ | +| Sqoop | Indirect Yes | Refer to FAQ | +| SQL-MySQL | Indirect Yes | Refer to FAQ | +| SQL-PostgreSQL | Yes | | +| SQL-Hive | Indirect Yes | Refer to FAQ | +| SQL-Spark | Indirect Yes | Refer to FAQ | +| SQL-ClickHouse | Indirect Yes | Refer to FAQ | +| SQL-Oracle | Indirect Yes | Refer to FAQ | +| SQL-SQLServer | Indirect Yes | Refer to FAQ | +| SQL-DB2 | Indirect Yes | Refer to FAQ | + ## Environment Variables The DolphinScheduler Docker container is configured through environment variables, and the default value will be used if an environment variable is not set. +### Database + **`DATABASE_TYPE`** This environment variable sets the type for database. The default value is `postgresql`. @@ -174,13 +214,23 @@ This environment variable sets the database for database. The default value is ` **Note**: You must be specify it when start a standalone dolphinscheduler server. Like `master-server`, `worker-server`, `api-server`, `alert-server`. -**`DOLPHINSCHEDULER_OPTS`** +### ZooKeeper -This environment variable sets jvm options for `master-server`, `worker-server`, `api-server` or `alert-server`. The default value is empty. +**`ZOOKEEPER_QUORUM`** -**`LOGGER_SERVER_OPTS`** +This environment variable sets zookeeper quorum. The default value is `127.0.0.1:2181`. + +**Note**: You must be specify it when start a standalone dolphinscheduler server. Like `master-server`, `worker-server`, `api-server`. + +**`ZOOKEEPER_ROOT`** + +This environment variable sets zookeeper root directory for dolphinscheduler. The default value is `/dolphinscheduler`. -This environment variable sets jvm options for `logger-server` (since `logger-server` is deployed with `worker-server`, it needs to be set separately). The default value is empty. +### Common + +**`DOLPHINSCHEDULER_OPTS`** + +This environment variable sets jvm options for dolphinscheduler, suitable for `master-server`, `worker-server`, `api-server`, `alert-server`, `logger-server`. The default value is empty. **`DATA_BASEDIR_PATH`** @@ -210,6 +260,54 @@ This environment variable sets s3 access key for resource storage. The default v This environment variable sets s3 secret key for resource storage. The default value is `xxxxxxx`. +**`HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE`** + +This environment variable sets whether to startup kerberos. The default value is `false`. + +**`JAVA_SECURITY_KRB5_CONF_PATH`** + +This environment variable sets java.security.krb5.conf path. The default value is `/opt/krb5.conf`. + +**`LOGIN_USER_KEYTAB_USERNAME`** + +This environment variable sets login user from keytab username. The default value is `hdfs@HADOOP.COM`. + +**`LOGIN_USER_KEYTAB_PATH`** + +This environment variable sets login user from keytab path. The default value is `/opt/hdfs.keytab`. + +**`KERBEROS_EXPIRE_TIME`** + +This environment variable sets kerberos expire time, the unit is hour. The default value is `2`. + +**`HDFS_ROOT_USER`** + +This environment variable sets hdfs root user when resource.storage.type=HDFS. The default value is `hdfs`. + +**`YARN_RESOURCEMANAGER_HA_RM_IDS`** + +This environment variable sets yarn resourcemanager ha rm ids. The default value is empty. + +**`YARN_APPLICATION_STATUS_ADDRESS`** + +This environment variable sets yarn application status address. The default value is `http://ds1:8088/ws/v1/cluster/apps/%s`. + +**`SKYWALKING_ENABLE`** + +This environment variable sets whether to enable skywalking. The default value is `false`. + +**`SW_AGENT_COLLECTOR_BACKEND_SERVICES`** + +This environment variable sets agent collector backend services for skywalking. The default value is `127.0.0.1:11800`. + +**`SW_GRPC_LOG_SERVER_HOST`** + +This environment variable sets grpc log server host for skywalking. The default value is `127.0.0.1`. + +**`SW_GRPC_LOG_SERVER_PORT`** + +This environment variable sets grpc log server port for skywalking. The default value is `11800`. + **`HADOOP_HOME`** This environment variable sets `HADOOP_HOME`. The default value is `/opt/soft/hadoop`. @@ -232,7 +330,7 @@ This environment variable sets `PYTHON_HOME`. The default value is `/usr/bin/pyt **`JAVA_HOME`** -This environment variable sets `JAVA_HOME`. The default value is `/usr/lib/jvm/java-1.8-openjdk`. +This environment variable sets `JAVA_HOME`. The default value is `/usr/local/openjdk-8`. **`HIVE_HOME`** @@ -246,23 +344,27 @@ This environment variable sets `FLINK_HOME`. The default value is `/opt/soft/fli This environment variable sets `DATAX_HOME`. The default value is `/opt/soft/datax`. -**`ZOOKEEPER_QUORUM`** - -This environment variable sets zookeeper quorum for `master-server` and `worker-serverr`. The default value is `127.0.0.1:2181`. - -**Note**: You must be specify it when start a standalone dolphinscheduler server. Like `master-server`, `worker-server`. +### Master Server -**`ZOOKEEPER_ROOT`** +**`MASTER_SERVER_OPTS`** -This environment variable sets zookeeper root directory for dolphinscheduler. The default value is `/dolphinscheduler`. +This environment variable sets jvm options for `master-server`. The default value is `-Xms1g -Xmx1g -Xmn512m`. **`MASTER_EXEC_THREADS`** -This environment variable sets exec thread num for `master-server`. The default value is `100`. +This environment variable sets exec thread number for `master-server`. The default value is `100`. **`MASTER_EXEC_TASK_NUM`** -This environment variable sets exec task num for `master-server`. The default value is `20`. +This environment variable sets exec task number for `master-server`. The default value is `20`. + +**`MASTER_DISPATCH_TASK_NUM`** + +This environment variable sets dispatch task number for `master-server`. The default value is `3`. + +**`MASTER_HOST_SELECTOR`** + +This environment variable sets host selector for `master-server`. Optional values include `Random`, `RoundRobin` and `LowerWeight`. The default value is `LowerWeight`. **`MASTER_HEARTBEAT_INTERVAL`** @@ -278,19 +380,21 @@ This environment variable sets task commit interval for `master-server`. The def **`MASTER_MAX_CPULOAD_AVG`** -This environment variable sets max cpu load avg for `master-server`. The default value is `100`. +This environment variable sets max cpu load avg for `master-server`. The default value is `-1`. **`MASTER_RESERVED_MEMORY`** -This environment variable sets reserved memory for `master-server`. The default value is `0.1`. +This environment variable sets reserved memory for `master-server`, the unit is G. The default value is `0.3`. + +### Worker Server -**`MASTER_LISTEN_PORT`** +**`WORKER_SERVER_OPTS`** -This environment variable sets port for `master-server`. The default value is `5678`. +This environment variable sets jvm options for `worker-server`. The default value is `-Xms1g -Xmx1g -Xmn512m`. **`WORKER_EXEC_THREADS`** -This environment variable sets exec thread num for `worker-server`. The default value is `100`. +This environment variable sets exec thread number for `worker-server`. The default value is `100`. **`WORKER_HEARTBEAT_INTERVAL`** @@ -298,20 +402,22 @@ This environment variable sets heartbeat interval for `worker-server`. The defau **`WORKER_MAX_CPULOAD_AVG`** -This environment variable sets max cpu load avg for `worker-server`. The default value is `100`. +This environment variable sets max cpu load avg for `worker-server`. The default value is `-1`. **`WORKER_RESERVED_MEMORY`** -This environment variable sets reserved memory for `worker-server`. The default value is `0.1`. - -**`WORKER_LISTEN_PORT`** - -This environment variable sets port for `worker-server`. The default value is `1234`. +This environment variable sets reserved memory for `worker-server`, the unit is G. The default value is `0.3`. **`WORKER_GROUPS`** This environment variable sets groups for `worker-server`. The default value is `default`. +### Alert Server + +**`ALERT_SERVER_OPTS`** + +This environment variable sets jvm options for `alert-server`. The default value is `-Xms512m -Xmx512m -Xmn256m`. + **`XLS_FILE_PATH`** This environment variable sets xls file path for `alert-server`. The default value is `/tmp/xls`. @@ -368,19 +474,31 @@ This environment variable sets enterprise wechat agent id for `alert-server`. Th This environment variable sets enterprise wechat users for `alert-server`. The default value is empty. +### Api Server + +**`API_SERVER_OPTS`** + +This environment variable sets jvm options for `api-server`. The default value is `-Xms512m -Xmx512m -Xmn256m`. + +### Logger Server + +**`LOGGER_SERVER_OPTS`** + +This environment variable sets jvm options for `logger-server`. The default value is `-Xms512m -Xmx512m -Xmn256m`. + ## Initialization scripts -If you would like to do additional initialization in an image derived from this one, add one or more environment variable under `/root/start-init-conf.sh`, and modify template files in `/opt/dolphinscheduler/conf/*.tpl`. +If you would like to do additional initialization in an image derived from this one, add one or more environment variables under `/root/start-init-conf.sh`, and modify template files in `/opt/dolphinscheduler/conf/*.tpl`. -For example, to add an environment variable `API_SERVER_PORT` in `/root/start-init-conf.sh`: +For example, to add an environment variable `SECURITY_AUTHENTICATION_TYPE` in `/root/start-init-conf.sh`: ``` -export API_SERVER_PORT=5555 +export SECURITY_AUTHENTICATION_TYPE=PASSWORD ``` -and to modify `/opt/dolphinscheduler/conf/application-api.properties.tpl` template file, add server port: +and to modify `application-api.properties.tpl` template file, add the `SECURITY_AUTHENTICATION_TYPE`: ``` -server.port=${API_SERVER_PORT} +security.authentication.type=${SECURITY_AUTHENTICATION_TYPE} ``` `/root/start-init-conf.sh` will dynamically generate config file: @@ -429,27 +547,26 @@ docker stack rm dolphinscheduler ### How to use MySQL as the DolphinScheduler's database instead of PostgreSQL? -> Because of the commercial license, we cannot directly use the driver and client of MySQL. +> Because of the commercial license, we cannot directly use the driver of MySQL. > > If you want to use MySQL, you can build a new image based on the `apache/dolphinscheduler` image as follows. 1. Download the MySQL driver [mysql-connector-java-5.1.49.jar](https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar) (require `>=5.1.47`) -2. Create a new `Dockerfile` to add MySQL driver and client: +2. Create a new `Dockerfile` to add MySQL driver: ``` FROM apache/dolphinscheduler:latest COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib -RUN apk add --update --no-cache mysql-client ``` -3. Build a new docker image including MySQL driver and client: +3. Build a new docker image including MySQL driver: ``` -docker build -t apache/dolphinscheduler:mysql . +docker build -t apache/dolphinscheduler:mysql-driver . ``` -4. Modify all `image` fields to `apache/dolphinscheduler:mysql` in `docker-compose.yml` +4. Modify all `image` fields to `apache/dolphinscheduler:mysql-driver` in `docker-compose.yml` > If you want to deploy dolphinscheduler on Docker Swarm, you need modify `docker-stack.yml` @@ -457,17 +574,17 @@ docker build -t apache/dolphinscheduler:mysql . 6. Add `dolphinscheduler-mysql` service in `docker-compose.yml` (**Optional**, you can directly use a external MySQL database) -7. Modify all DATABASE environments in `docker-compose.yml` +7. Modify DATABASE environments in `config.env.sh` ``` -DATABASE_TYPE: mysql -DATABASE_DRIVER: com.mysql.jdbc.Driver -DATABASE_HOST: dolphinscheduler-mysql -DATABASE_PORT: 3306 -DATABASE_USERNAME: root -DATABASE_PASSWORD: root -DATABASE_DATABASE: dolphinscheduler -DATABASE_PARAMS: useUnicode=true&characterEncoding=UTF-8 +DATABASE_TYPE=mysql +DATABASE_DRIVER=com.mysql.jdbc.Driver +DATABASE_HOST=dolphinscheduler-mysql +DATABASE_PORT=3306 +DATABASE_USERNAME=root +DATABASE_PASSWORD=root +DATABASE_DATABASE=dolphinscheduler +DATABASE_PARAMS=useUnicode=true&characterEncoding=UTF-8 ``` > If you have added `dolphinscheduler-mysql` service in `docker-compose.yml`, just set `DATABASE_HOST` to `dolphinscheduler-mysql` @@ -532,4 +649,124 @@ docker build -t apache/dolphinscheduler:oracle-driver . 6. Add a Oracle datasource in `Datasource manage` +### How to support Python 2 pip and custom requirements.txt? + +1. Create a new `Dockerfile` to install pip: + +``` +FROM apache/dolphinscheduler:latest +COPY requirements.txt /tmp +RUN apt-get update && \ + apt-get install -y --no-install-recommends python-pip && \ + pip install --no-cache-dir -r /tmp/requirements.txt && \ + rm -rf /var/lib/apt/lists/* +``` + +The command will install the default **pip 18.1**. If you upgrade the pip, just add one line + +``` + pip install --no-cache-dir -U pip && \ +``` + +2. Build a new docker image including pip: + +``` +docker build -t apache/dolphinscheduler:pip . +``` + +3. Modify all `image` fields to `apache/dolphinscheduler:pip` in `docker-compose.yml` + +> If you want to deploy dolphinscheduler on Docker Swarm, you need modify `docker-stack.yml` + +4. Run a dolphinscheduler (See **How to use this docker image**) + +5. Verify pip under a new Python task + +### How to support Python 3? + +1. Create a new `Dockerfile` to install Python 3: + +``` +FROM apache/dolphinscheduler:latest +RUN apt-get update && \ + apt-get install -y --no-install-recommends python3 && \ + rm -rf /var/lib/apt/lists/* +``` + +The command will install the default **Python 3.7.3**. If you also want to install **pip3**, just replace `python3` with `python3-pip` like + +``` + apt-get install -y --no-install-recommends python3-pip && \ +``` + +2. Build a new docker image including Python 3: + +``` +docker build -t apache/dolphinscheduler:python3 . +``` + +3. Modify all `image` fields to `apache/dolphinscheduler:python3` in `docker-compose.yml` + +> If you want to deploy dolphinscheduler on Docker Swarm, you need modify `docker-stack.yml` + +4. Modify `PYTHON_HOME` to `/usr/bin/python3` in `config.env.sh` + +5. Run a dolphinscheduler (See **How to use this docker image**) + +6. Verify Python 3 under a new Python task + +### How to support Hadoop, Spark, Flink, Hive or DataX? + +Take Spark 2.4.7 as an example: + +1. Download the Spark 2.4.7 release binary `spark-2.4.7-bin-hadoop2.7.tgz` + +2. Run a dolphinscheduler (See **How to use this docker image**) + +3. Copy the Spark 2.4.7 release binary into Docker container + +```bash +docker cp spark-2.4.7-bin-hadoop2.7.tgz dolphinscheduler-worker:/opt/soft +``` + +Because the volume `dolphinscheduler-shared-local` is mounted on `/opt/soft`, all files in `/opt/soft` will not be lost + +4. Attach the container and ensure that `SPARK_HOME2` exists + +```bash +docker exec -it dolphinscheduler-worker bash +cd /opt/soft +tar zxf spark-2.4.7-bin-hadoop2.7.tgz +rm -f spark-2.4.7-bin-hadoop2.7.tgz +ln -s spark-2.4.7-bin-hadoop2.7 spark2 # or just mv +$SPARK_HOME2/bin/spark-submit --version +``` + +The last command will print Spark version if everything goes well + +5. Verify Spark under a Shell task + +``` +$SPARK_HOME2/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME2/examples/jars/spark-examples_2.11-2.4.7.jar +``` + +Check whether the task log contains the output like `Pi is roughly 3.146015` + +6. Verify Spark under a Spark task + +The file `spark-examples_2.11-2.4.7.jar` needs to be uploaded to the resources first, and then create a Spark task with: + +- Spark Version: `SPARK2` +- Main Class: `org.apache.spark.examples.SparkPi` +- Main Package: `spark-examples_2.11-2.4.7.jar` +- Deploy Mode: `local` + +Similarly, check whether the task log contains the output like `Pi is roughly 3.146015` + +7. Verify Spark on YARN + +Spark on YARN (Deploy Mode is `cluster` or `client`) requires Hadoop support. Similar to Spark support, the operation of supporting Hadoop is almost the same as the previous steps + +Ensure that `$HADOOP_HOME` and `$HADOOP_CONF_DIR` exists + For more information please refer to the [incubator-dolphinscheduler](https://github.com/apache/incubator-dolphinscheduler.git) documentation. diff --git a/docker/build/README_zh_CN.md b/docker/build/README_zh_CN.md index 0ea3dc21b6..309e46285e 100644 --- a/docker/build/README_zh_CN.md +++ b/docker/build/README_zh_CN.md @@ -34,15 +34,15 @@ $ docker-compose -f ./docker/docker-swarm/docker-compose.yml up -d > **提示**: 为了在docker中快速开始,你可以创建一个名为`ds`的租户,并将这个租户`ds`关联到用户`admin` -#### 或者通过环境变量 **`DATABASE_HOST`** **`DATABASE_PORT`** **`ZOOKEEPER_QUORUM`** 使用已存在的服务 +#### 或者通过环境变量 **`DATABASE_HOST`**, **`DATABASE_PORT`**, **`ZOOKEEPER_QUORUM`** 你可以指定已经存在的 **`Postgres`** 和 **`Zookeeper`** 服务. 如下: ``` $ docker run -d --name dolphinscheduler \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -p 12345:12345 \ apache/dolphinscheduler:latest all ``` @@ -63,9 +63,9 @@ docker volume create dolphinscheduler-resource-local ``` $ docker run -d --name dolphinscheduler-master \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ apache/dolphinscheduler:latest master-server ``` @@ -73,9 +73,9 @@ apache/dolphinscheduler:latest master-server ``` $ docker run -d --name dolphinscheduler-worker \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -v dolphinscheduler-resource-local:/dolphinscheduler \ apache/dolphinscheduler:latest worker-server ``` @@ -84,9 +84,9 @@ apache/dolphinscheduler:latest worker-server ``` $ docker run -d --name dolphinscheduler-api \ --e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -e DATABASE_HOST="192.168.x.x" -e DATABASE_PORT="5432" -e DATABASE_DATABASE="dolphinscheduler" \ -e DATABASE_USERNAME="test" -e DATABASE_PASSWORD="test" \ +-e ZOOKEEPER_QUORUM="192.168.x.x:2181" \ -v dolphinscheduler-resource-local:/dolphinscheduler \ -p 12345:12345 \ apache/dolphinscheduler:latest api-server @@ -101,7 +101,7 @@ $ docker run -d --name dolphinscheduler-alert \ apache/dolphinscheduler:latest alert-server ``` -**注意**: 当你运行dolphinscheduler中的部分服务时,你必须指定这些环境变量 `DATABASE_HOST` `DATABASE_PORT` `DATABASE_DATABASE` `DATABASE_USERNAME` `DATABASE_PASSWORD` `ZOOKEEPER_QUORUM`。 +**注意**: 当你运行dolphinscheduler中的部分服务时,你必须指定这些环境变量 `DATABASE_HOST`, `DATABASE_PORT`, `DATABASE_DATABASE`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `ZOOKEEPER_QUORUM`。 ## 如何构建一个docker镜像 @@ -122,10 +122,50 @@ C:\incubator-dolphinscheduler>.\docker\build\hooks\build.bat 如果你不理解这些脚本 `./docker/build/hooks/build` `./docker/build/hooks/build.bat`,请阅读里面的内容。 +## 支持矩阵 + +| Type | 支持 | 备注 | +| ------------------------------------------------------------ | ------- | --------------------- | +| Shell | 是 | | +| Python2 | 是 | | +| Python3 | 间接支持 | 详见 FAQ | +| Hadoop2 | 间接支持 | 详见 FAQ | +| Hadoop3 | 尚未确定 | 尚未测试 | +| Spark-Local(client) | 间接支持 | 详见 FAQ | +| Spark-YARN(cluster) | 间接支持 | 详见 FAQ | +| Spark-Mesos(cluster) | 尚不 | | +| Spark-Standalone(cluster) | 尚不 | | +| Spark-Kubernetes(cluster) | 尚不 | | +| Flink-Local(local>=1.11) | 尚不 | Generic CLI 模式尚未支持 | +| Flink-YARN(yarn-cluster) | 间接支持 | 详见 FAQ | +| Flink-YARN(yarn-session/yarn-per-job/yarn-application>=1.11) | 尚不 | Generic CLI 模式尚未支持 | +| Flink-Mesos(default) | 尚不 | | +| Flink-Mesos(remote>=1.11) | 尚不 | Generic CLI 模式尚未支持 | +| Flink-Standalone(default) | 尚不 | | +| Flink-Standalone(remote>=1.11) | 尚不 | Generic CLI 模式尚未支持 | +| Flink-Kubernetes(default) | 尚不 | | +| Flink-Kubernetes(remote>=1.11) | 尚不 | Generic CLI 模式尚未支持 | +| Flink-NativeKubernetes(kubernetes-session/application>=1.11) | 尚不 | Generic CLI 模式尚未支持 | +| MapReduce | 间接支持 | 详见 FAQ | +| Kerberos | 间接支持 | 详见 FAQ | +| HTTP | 是 | | +| DataX | 间接支持 | 详见 FAQ | +| Sqoop | 间接支持 | 详见 FAQ | +| SQL-MySQL | 间接支持 | 详见 FAQ | +| SQL-PostgreSQL | 是 | | +| SQL-Hive | 间接支持 | 详见 FAQ | +| SQL-Spark | 间接支持 | 详见 FAQ | +| SQL-ClickHouse | 间接支持 | 详见 FAQ | +| SQL-Oracle | 间接支持 | 详见 FAQ | +| SQL-SQLServer | 间接支持 | 详见 FAQ | +| SQL-DB2 | 间接支持 | 详见 FAQ | + ## 环境变量 DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会使用默认值 +### 数据库 + **`DATABASE_TYPE`** 配置`database`的`TYPE`, 默认值 `postgresql`。 @@ -174,13 +214,23 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 **注意**: 当运行`dolphinscheduler`中`master-server`、`worker-server`、`api-server`、`alert-server`这些服务时,必须指定这个环境变量,以便于你更好的搭建分布式服务。 -**`DOLPHINSCHEDULER_OPTS`** +### ZooKeeper -配置`master-server`、`worker-server`、`api-server`或`alert-server`的`jvm options`,默认值 `""`、 +**`ZOOKEEPER_QUORUM`** -**`LOGGER_SERVER_OPTS`** +配置`dolphinscheduler`的`Zookeeper`地址, 默认值 `127.0.0.1:2181`。 + +**注意**: 当运行`dolphinscheduler`中`master-server`、`worker-server`、`api-server`这些服务时,必须指定这个环境变量,以便于你更好的搭建分布式服务。 + +**`ZOOKEEPER_ROOT`** + +配置`dolphinscheduler`在`zookeeper`中数据存储的根目录,默认值 `/dolphinscheduler`。 + +### 通用 -配置`logger-server`的`jvm options`(由于`logger-server`和`worker-server`共同部署,因此它需要单独设置),默认值 `""`、 +**`DOLPHINSCHEDULER_OPTS`** + +配置`dolphinscheduler`的`jvm options`,适用于`master-server`、`worker-server`、`api-server`、`alert-server`、`logger-server`,默认值 `""`、 **`DATA_BASEDIR_PATH`** @@ -210,6 +260,54 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 当`RESOURCE_STORAGE_TYPE=S3`时,需要配置`S3`的`s3 secret key`,默认值 `xxxxxxx`。 +**`HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE`** + +配置`dolphinscheduler`是否启用kerberos,默认值 `false`。 + +**`JAVA_SECURITY_KRB5_CONF_PATH`** + +配置`dolphinscheduler`的java.security.krb5.conf路径,默认值 `/opt/krb5.conf`。 + +**`LOGIN_USER_KEYTAB_USERNAME`** + +配置`dolphinscheduler`登录用户的keytab用户名,默认值 `hdfs@HADOOP.COM`。 + +**`LOGIN_USER_KEYTAB_PATH`** + +配置`dolphinscheduler`登录用户的keytab路径,默认值 `/opt/hdfs.keytab`。 + +**`KERBEROS_EXPIRE_TIME`** + +配置`dolphinscheduler`的kerberos过期时间,单位为小时,默认值 `2`。 + +**`HDFS_ROOT_USER`** + +当`RESOURCE_STORAGE_TYPE=HDFS`时,配置`dolphinscheduler`的hdfs的root用户名,默认值 `hdfs`。 + +**`YARN_RESOURCEMANAGER_HA_RM_IDS`** + +配置`dolphinscheduler`的yarn resourcemanager ha rm ids,默认值 `空`。 + +**`YARN_APPLICATION_STATUS_ADDRESS`** + +配置`dolphinscheduler`的yarn application status地址,默认值 `http://ds1:8088/ws/v1/cluster/apps/%s`。 + +**`SKYWALKING_ENABLE`** + +配置`skywalking`是否启用. 默认值 `false`。 + +**`SW_AGENT_COLLECTOR_BACKEND_SERVICES`** + +配置`skywalking`的collector后端地址. 默认值 `127.0.0.1:11800`。 + +**`SW_GRPC_LOG_SERVER_HOST`** + +配置`skywalking`的grpc服务主机或IP. 默认值 `127.0.0.1`。 + +**`SW_GRPC_LOG_SERVER_PORT`** + +配置`skywalking`的grpc服务端口. 默认值 `11800`。 + **`HADOOP_HOME`** 配置`dolphinscheduler`的`HADOOP_HOME`,默认值 `/opt/soft/hadoop`。 @@ -232,7 +330,7 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 **`JAVA_HOME`** -配置`dolphinscheduler`的`JAVA_HOME`,默认值 `/usr/lib/jvm/java-1。8-openjdk`。 +配置`dolphinscheduler`的`JAVA_HOME`,默认值 `/usr/local/openjdk-8`。 **`HIVE_HOME`** @@ -246,15 +344,11 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 配置`dolphinscheduler`的`DATAX_HOME`,默认值 `/opt/soft/datax`。 -**`ZOOKEEPER_QUORUM`** - -配置`master-server`和`worker-serverr`的`Zookeeper`地址, 默认值 `127.0.0.1:2181`。 +### Master Server -**注意**: 当运行`dolphinscheduler`中`master-server`、`worker-server`这些服务时,必须指定这个环境变量,以便于你更好的搭建分布式服务。 +**`MASTER_SERVER_OPTS`** -**`ZOOKEEPER_ROOT`** - -配置`dolphinscheduler`在`zookeeper`中数据存储的根目录,默认值 `/dolphinscheduler`。 +配置`master-server`的`jvm options`,默认值 `-Xms1g -Xmx1g -Xmn512m`。 **`MASTER_EXEC_THREADS`** @@ -264,6 +358,14 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 配置`master-server`中的执行任务数量,默认值 `20`。 +**`MASTER_DISPATCH_TASK_NUM`** + +配置`master-server`中的派发任务数量,默认值 `3`。 + +**`MASTER_HOST_SELECTOR`** + +配置`master-server`中派发任务时worker host的选择器,可选值为`Random`, `RoundRobin`和`LowerWeight`,默认值 `LowerWeight`。 + **`MASTER_HEARTBEAT_INTERVAL`** 配置`master-server`中的心跳交互时间,默认值 `10`。 @@ -278,15 +380,17 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 **`MASTER_MAX_CPULOAD_AVG`** -配置`master-server`中的CPU中的`load average`值,默认值 `100`。 +配置`master-server`中的CPU中的`load average`值,默认值 `-1`。 **`MASTER_RESERVED_MEMORY`** -配置`master-server`的保留内存,默认值 `0.1`。 +配置`master-server`的保留内存,单位为G,默认值 `0.3`。 -**`MASTER_LISTEN_PORT`** +### Worker Server -配置`master-server`的端口,默认值 `5678`。 +**`WORKER_SERVER_OPTS`** + +配置`worker-server`的`jvm options`,默认值 `-Xms1g -Xmx1g -Xmn512m`。 **`WORKER_EXEC_THREADS`** @@ -298,20 +402,22 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 **`WORKER_MAX_CPULOAD_AVG`** -配置`worker-server`中的CPU中的最大`load average`值,默认值 `100`。 +配置`worker-server`中的CPU中的最大`load average`值,默认值 `-1`。 **`WORKER_RESERVED_MEMORY`** -配置`worker-server`的保留内存,默认值 `0.1`。 - -**`WORKER_LISTEN_PORT`** - -配置`worker-server`的端口,默认值 `1234`。 +配置`worker-server`的保留内存,单位为G,默认值 `0.3`。 **`WORKER_GROUPS`** 配置`worker-server`的分组,默认值 `default`。 +### Alert Server + +**`ALERT_SERVER_OPTS`** + +配置`alert-server`的`jvm options`,默认值 `-Xms512m -Xmx512m -Xmn256m`。 + **`XLS_FILE_PATH`** 配置`alert-server`的`XLS`文件的存储路径,默认值 `/tmp/xls`。 @@ -368,19 +474,31 @@ DolphinScheduler Docker 容器通过环境变量进行配置,缺省时将会 配置`alert-server`的邮件服务企业微信`USERS`,默认值 `空`。 +### Api Server + +**`API_SERVER_OPTS`** + +配置`api-server`的`jvm options`,默认值 `-Xms512m -Xmx512m -Xmn256m`。 + +### Logger Server + +**`LOGGER_SERVER_OPTS`** + +配置`logger-server`的`jvm options`,默认值 `-Xms512m -Xmx512m -Xmn256m`。 + ## 初始化脚本 如果你想在编译的时候或者运行的时候附加一些其它的操作及新增一些环境变量,你可以在`/root/start-init-conf.sh`文件中进行修改,同时如果涉及到配置文件的修改,请在`/opt/dolphinscheduler/conf/*.tpl`中修改相应的配置文件 -例如,在`/root/start-init-conf.sh`添加一个环境变量`API_SERVER_PORT`: +例如,在`/root/start-init-conf.sh`添加一个环境变量`SECURITY_AUTHENTICATION_TYPE`: ``` -export API_SERVER_PORT=5555 +export SECURITY_AUTHENTICATION_TYPE=PASSWORD ``` -当添加以上环境变量后,你应该在相应的模板文件`/opt/dolphinscheduler/conf/application-api.properties.tpl`中添加这个环境变量配置: +当添加以上环境变量后,你应该在相应的模板文件`application-api.properties.tpl`中添加这个环境变量配置: ``` -server.port=${API_SERVER_PORT} +security.authentication.type=${SECURITY_AUTHENTICATION_TYPE} ``` `/root/start-init-conf.sh`将根据模板文件动态的生成配置文件: @@ -429,27 +547,26 @@ docker stack rm dolphinscheduler ### 如何用 MySQL 替代 PostgreSQL 作为 DolphinScheduler 的数据库? -> 由于商业许可证的原因,我们不能直接使用 MySQL 的驱动包和客户端. +> 由于商业许可证的原因,我们不能直接使用 MySQL 的驱动包. > > 如果你要使用 MySQL, 你可以基于官方镜像 `apache/dolphinscheduler` 进行构建. 1. 下载 MySQL 驱动包 [mysql-connector-java-5.1.49.jar](https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar) (要求 `>=5.1.47`) -2. 创建一个新的 `Dockerfile`,用于添加 MySQL 的驱动包和客户端: +2. 创建一个新的 `Dockerfile`,用于添加 MySQL 的驱动包: ``` FROM apache/dolphinscheduler:latest COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib -RUN apk add --update --no-cache mysql-client ``` -3. 构建一个包含 MySQL 的驱动包和客户端的新镜像: +3. 构建一个包含 MySQL 驱动包的新镜像: ``` -docker build -t apache/dolphinscheduler:mysql . +docker build -t apache/dolphinscheduler:mysql-driver . ``` -4. 修改 `docker-compose.yml` 文件中的所有 image 字段为 `apache/dolphinscheduler:mysql` +4. 修改 `docker-compose.yml` 文件中的所有 image 字段为 `apache/dolphinscheduler:mysql-driver` > 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` @@ -457,17 +574,17 @@ docker build -t apache/dolphinscheduler:mysql . 6. 在 `docker-compose.yml` 文件中添加 `dolphinscheduler-mysql` 服务(**可选**,你可以直接使用一个外部的 MySQL 数据库) -7. 修改 `docker-compose.yml` 文件中的所有 DATABASE 环境变量 +7. 修改 `config.env.sh` 文件中的 DATABASE 环境变量 ``` -DATABASE_TYPE: mysql -DATABASE_DRIVER: com.mysql.jdbc.Driver -DATABASE_HOST: dolphinscheduler-mysql -DATABASE_PORT: 3306 -DATABASE_USERNAME: root -DATABASE_PASSWORD: root -DATABASE_DATABASE: dolphinscheduler -DATABASE_PARAMS: useUnicode=true&characterEncoding=UTF-8 +DATABASE_TYPE=mysql +DATABASE_DRIVER=com.mysql.jdbc.Driver +DATABASE_HOST=dolphinscheduler-mysql +DATABASE_PORT=3306 +DATABASE_USERNAME=root +DATABASE_PASSWORD=root +DATABASE_DATABASE=dolphinscheduler +DATABASE_PARAMS=useUnicode=true&characterEncoding=UTF-8 ``` > 如果你已经添加了 `dolphinscheduler-mysql` 服务,设置 `DATABASE_HOST` 为 `dolphinscheduler-mysql` 即可 @@ -495,7 +612,7 @@ COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib docker build -t apache/dolphinscheduler:mysql-driver . ``` -4. 将 `docker-compose.yml` 文件中的所有 image 字段 修改为 `apache/dolphinscheduler:mysql-driver` +4. 将 `docker-compose.yml` 文件中的所有 `image` 字段修改为 `apache/dolphinscheduler:mysql-driver` > 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` @@ -524,7 +641,7 @@ COPY ojdbc8-19.9.0.0.jar /opt/dolphinscheduler/lib docker build -t apache/dolphinscheduler:oracle-driver . ``` -4. 将 `docker-compose.yml` 文件中的所有 image 字段 修改为 `apache/dolphinscheduler:oracle-driver` +4. 将 `docker-compose.yml` 文件中的所有 `image` 字段修改为 `apache/dolphinscheduler:oracle-driver` > 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` @@ -532,4 +649,124 @@ docker build -t apache/dolphinscheduler:oracle-driver . 6. 在数据源中心添加一个 Oracle 数据源 +### 如何支持 Python 2 pip 以及自定义 requirements.txt? + +1. 创建一个新的 `Dockerfile`,用于安装 pip: + +``` +FROM apache/dolphinscheduler:latest +COPY requirements.txt /tmp +RUN apt-get update && \ + apt-get install -y --no-install-recommends python-pip && \ + pip install --no-cache-dir -r /tmp/requirements.txt && \ + rm -rf /var/lib/apt/lists/* +``` + +这个命令会安装默认的 **pip 18.1**. 如果你想升级 pip, 只需添加一行 + +``` + pip install --no-cache-dir -U pip && \ +``` + +2. 构建一个包含 pip 的新镜像: + +``` +docker build -t apache/dolphinscheduler:pip . +``` + +3. 将 `docker-compose.yml` 文件中的所有 `image` 字段修改为 `apache/dolphinscheduler:pip` + +> 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` + +4. 运行 dolphinscheduler (详见**如何使用docker镜像**) + +5. 在一个新 Python 任务下验证 pip + +### 如何支持 Python 3? + +1. 创建一个新的 `Dockerfile`,用于安装 Python 3: + +``` +FROM apache/dolphinscheduler:latest +RUN apt-get update && \ + apt-get install -y --no-install-recommends python3 && \ + rm -rf /var/lib/apt/lists/* +``` + +这个命令会安装默认的 **Python 3.7.3**. 如果你也想安装 **pip3**, 将 `python3` 替换为 `python3-pip` 即可 + +``` + apt-get install -y --no-install-recommends python3-pip && \ +``` + +2. 构建一个包含 Python 3 的新镜像: + +``` +docker build -t apache/dolphinscheduler:python3 . +``` + +3. 将 `docker-compose.yml` 文件中的所有 `image` 字段修改为 `apache/dolphinscheduler:python3` + +> 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` + +4. 修改 `config.env.sh` 文件中的 `PYTHON_HOME` 为 `/usr/bin/python3` + +5. 运行 dolphinscheduler (详见**如何使用docker镜像**) + +6. 在一个新 Python 任务下验证 Python 3 + +### 如何支持 Hadoop, Spark, Flink, Hive 或 DataX? + +以 Spark 2.4.7 为例: + +1. 下载 Spark 2.4.7 发布的二进制包 `spark-2.4.7-bin-hadoop2.7.tgz` + +2. 运行 dolphinscheduler (详见**如何使用docker镜像**) + +3. 复制 Spark 2.4.7 二进制包到 Docker 容器中 + +```bash +docker cp spark-2.4.7-bin-hadoop2.7.tgz dolphinscheduler-worker:/opt/soft +``` + +因为存储卷 `dolphinscheduler-shared-local` 被挂载到 `/opt/soft`, 因此 `/opt/soft` 中的所有文件都不会丢失 + +4. 登录到容器并确保 `SPARK_HOME2` 存在 + +```bash +docker exec -it dolphinscheduler-worker bash +cd /opt/soft +tar zxf spark-2.4.7-bin-hadoop2.7.tgz +rm -f spark-2.4.7-bin-hadoop2.7.tgz +ln -s spark-2.4.7-bin-hadoop2.7 spark2 # or just mv +$SPARK_HOME2/bin/spark-submit --version +``` + +如果一切执行正常,最后一条命令将会打印 Spark 版本信息 + +5. 在一个 Shell 任务下验证 Spark + +``` +$SPARK_HOME2/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME2/examples/jars/spark-examples_2.11-2.4.7.jar +``` + +检查任务日志是否包含输出 `Pi is roughly 3.146015` + +6. 在一个 Spark 任务下验证 Spark + +文件 `spark-examples_2.11-2.4.7.jar` 需要先被上传到资源中心,然后创建一个 Spark 任务并设置: + +- Spark版本: `SPARK2` +- 主函数的Class: `org.apache.spark.examples.SparkPi` +- 主程序包: `spark-examples_2.11-2.4.7.jar` +- 部署方式: `local` + +同样地, 检查任务日志是否包含输出 `Pi is roughly 3.146015` + +7. 验证 Spark on YARN + +Spark on YARN (部署方式为 `cluster` 或 `client`) 需要 Hadoop 支持. 类似于 Spark 支持, 支持 Hadoop 的操作几乎和前面的步骤相同 + +确保 `$HADOOP_HOME` 和 `$HADOOP_CONF_DIR` 存在 + 更多信息请查看 [incubator-dolphinscheduler](https://github.com/apache/incubator-dolphinscheduler.git) 文档. diff --git a/docker/build/conf/dolphinscheduler/common.properties.tpl b/docker/build/conf/dolphinscheduler/common.properties.tpl index e272c21f06..83f730791d 100644 --- a/docker/build/conf/dolphinscheduler/common.properties.tpl +++ b/docker/build/conf/dolphinscheduler/common.properties.tpl @@ -15,34 +15,37 @@ # limitations under the License. # -# resource storage type : HDFS, S3, NONE +# user data local directory path, please make sure the directory exists and have read write permissions +data.basedir.path=${DATA_BASEDIR_PATH} + +# resource storage type: HDFS, S3, NONE resource.storage.type=${RESOURCE_STORAGE_TYPE} -# resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions。"/dolphinscheduler" is recommended +# resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended resource.upload.path=${RESOURCE_UPLOAD_PATH} -# user data local directory path, please make sure the directory exists and have read write permissions -data.basedir.path=${DATA_BASEDIR_PATH} - -# whether kerberos starts -hadoop.security.authentication.startup.state=false +# whether to startup kerberos +hadoop.security.authentication.startup.state=${HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE} # java.security.krb5.conf path -java.security.krb5.conf.path=/opt/krb5.conf +java.security.krb5.conf.path=${JAVA_SECURITY_KRB5_CONF_PATH} # login user from keytab username -login.user.keytab.username=hdfs-mycluster@ESZ.COM +login.user.keytab.username=${LOGIN_USER_KEYTAB_USERNAME} # login user from keytab path -login.user.keytab.path=/opt/hdfs.headless.keytab +login.user.keytab.path=${LOGIN_USER_KEYTAB_PATH} -#resource.view.suffixs +# kerberos expire time, the unit is hour +kerberos.expire.time=${KERBEROS_EXPIRE_TIME} + +# resource view suffixs #resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js -# if resource.storage.type=HDFS, the user need to have permission to create directories under the HDFS root path -hdfs.root.user=hdfs +# if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path +hdfs.root.user=${HDFS_ROOT_USER} -# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS, When namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir +# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir fs.defaultFS=${FS_DEFAULT_FS} # if resource.storage.type=S3, s3 endpoint @@ -54,13 +57,14 @@ fs.s3a.access.key=${FS_S3A_ACCESS_KEY} # if resource.storage.type=S3, s3 secret key fs.s3a.secret.key=${FS_S3A_SECRET_KEY} -# if resourcemanager HA enable, please type the HA ips ; if resourcemanager is single, make this value empty -yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx +# if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty +yarn.resourcemanager.ha.rm.ids=${YARN_RESOURCEMANAGER_HA_RM_IDS} -# if resourcemanager HA enable or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname. -yarn.application.status.address=http://ds1:8088/ws/v1/cluster/apps/%s +# if resourcemanager HA is enabled or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname +yarn.application.status.address=${YARN_APPLICATION_STATUS_ADDRESS} # system env path #dolphinscheduler.env.path=env/dolphinscheduler_env.sh + +# development state development.state=false -kerberos.expire.time=1 diff --git a/docker/build/conf/dolphinscheduler/master.properties.tpl b/docker/build/conf/dolphinscheduler/master.properties.tpl index 7b07dd7d62..151117a157 100644 --- a/docker/build/conf/dolphinscheduler/master.properties.tpl +++ b/docker/build/conf/dolphinscheduler/master.properties.tpl @@ -15,14 +15,20 @@ # limitations under the License. # -# master execute thread num +# master listen port +#master.listen.port=5678 + +# master execute thread number master.exec.threads=${MASTER_EXEC_THREADS} # master execute task number in parallel master.exec.task.num=${MASTER_EXEC_TASK_NUM} # master dispatch task number -#master.dispatch.task.num=3 +master.dispatch.task.num=${MASTER_DISPATCH_TASK_NUM} + +# master host selector to select a suitable worker, default value: LowerWeight. Optional values include Random, RoundRobin, LowerWeight +master.host.selector=${MASTER_HOST_SELECTOR} # master heartbeat interval master.heartbeat.interval=${MASTER_HEARTBEAT_INTERVAL} @@ -33,11 +39,8 @@ master.task.commit.retryTimes=${MASTER_TASK_COMMIT_RETRYTIMES} # master commit task interval master.task.commit.interval=${MASTER_TASK_COMMIT_INTERVAL} -# only less than cpu avg load, master server can work. default value : the number of cpu cores * 2 +# only less than cpu avg load, master server can work. default value -1: the number of cpu cores * 2 master.max.cpuload.avg=${MASTER_MAX_CPULOAD_AVG} -# only larger than reserved memory, master server can work. default value : physical memory * 1/10, unit is G. +# only larger than reserved memory, master server can work. default value 0.3, the unit is G master.reserved.memory=${MASTER_RESERVED_MEMORY} - -# master listen port -master.listen.port=${MASTER_LISTEN_PORT} diff --git a/docker/build/conf/dolphinscheduler/worker.properties.tpl b/docker/build/conf/dolphinscheduler/worker.properties.tpl index eb85f62406..b021cbfe1a 100644 --- a/docker/build/conf/dolphinscheduler/worker.properties.tpl +++ b/docker/build/conf/dolphinscheduler/worker.properties.tpl @@ -15,7 +15,10 @@ # limitations under the License. # -# worker execute thread num +# worker listener port +#worker.listen.port=1234 + +# worker execute thread number worker.exec.threads=${WORKER_EXEC_THREADS} # worker heartbeat interval @@ -24,11 +27,8 @@ worker.heartbeat.interval=${WORKER_HEARTBEAT_INTERVAL} # only less than cpu avg load, worker server can work. default value -1: the number of cpu cores * 2 worker.max.cpuload.avg=${WORKER_MAX_CPULOAD_AVG} -# only larger than reserved memory, worker server can work. default value : physical memory * 1/6, unit is G. +# only larger than reserved memory, worker server can work. default value 0.3, the unit is G worker.reserved.memory=${WORKER_RESERVED_MEMORY} -# worker listener port -worker.listen.port=${WORKER_LISTEN_PORT} - -# default worker groups, if this worker belongs different groups, you can config the following like that 'worker.groups=default,test' +# default worker groups separated by comma, like 'worker.groups=default,test' worker.groups=${WORKER_GROUPS} diff --git a/docker/build/startup-init-conf.sh b/docker/build/startup-init-conf.sh index d615281b14..0a21633391 100755 --- a/docker/build/startup-init-conf.sh +++ b/docker/build/startup-init-conf.sh @@ -23,7 +23,7 @@ echo "init env variables" # Define parameters default value #============================================================================ -# Database Source +# Database #============================================================================ export DATABASE_TYPE=${DATABASE_TYPE:-"postgresql"} export DATABASE_DRIVER=${DATABASE_DRIVER:-"org.postgresql.Driver"} @@ -34,12 +34,17 @@ export DATABASE_PASSWORD=${DATABASE_PASSWORD:-"root"} export DATABASE_DATABASE=${DATABASE_DATABASE:-"dolphinscheduler"} export DATABASE_PARAMS=${DATABASE_PARAMS:-"characterEncoding=utf8"} +#============================================================================ +# ZooKeeper +#============================================================================ +export ZOOKEEPER_QUORUM=${ZOOKEEPER_QUORUM:-"127.0.0.1:2181"} +export ZOOKEEPER_ROOT=${ZOOKEEPER_ROOT:-"/dolphinscheduler"} + #============================================================================ # Common #============================================================================ -# dolphinscheduler env +# common opts export DOLPHINSCHEDULER_OPTS=${DOLPHINSCHEDULER_OPTS:-""} -export LOGGER_SERVER_OPTS=${LOGGER_SERVER_OPTS:-""} # common env export DATA_BASEDIR_PATH=${DATA_BASEDIR_PATH:-"/tmp/dolphinscheduler"} export RESOURCE_STORAGE_TYPE=${RESOURCE_STORAGE_TYPE:-"HDFS"} @@ -48,48 +53,58 @@ export FS_DEFAULT_FS=${FS_DEFAULT_FS:-"file:///"} export FS_S3A_ENDPOINT=${FS_S3A_ENDPOINT:-"s3.xxx.amazonaws.com"} export FS_S3A_ACCESS_KEY=${FS_S3A_ACCESS_KEY:-"xxxxxxx"} export FS_S3A_SECRET_KEY=${FS_S3A_SECRET_KEY:-"xxxxxxx"} -# dolphinscheduler task env +export HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE=${HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE:-"false"} +export JAVA_SECURITY_KRB5_CONF_PATH=${JAVA_SECURITY_KRB5_CONF_PATH:-"/opt/krb5.conf"} +export LOGIN_USER_KEYTAB_USERNAME=${LOGIN_USER_KEYTAB_USERNAME:-"hdfs@HADOOP.COM"} +export LOGIN_USER_KEYTAB_PATH=${LOGIN_USER_KEYTAB_PATH:-"/opt/hdfs.keytab"} +export KERBEROS_EXPIRE_TIME=${KERBEROS_EXPIRE_TIME:-"2"} +export HDFS_ROOT_USER=${HDFS_ROOT_USER:-"hdfs"} +export YARN_RESOURCEMANAGER_HA_RM_IDS=${YARN_RESOURCEMANAGER_HA_RM_IDS:-""} +export YARN_APPLICATION_STATUS_ADDRESS=${YARN_APPLICATION_STATUS_ADDRESS:-"http://ds1:8088/ws/v1/cluster/apps/%s"} +# skywalking +export SKYWALKING_ENABLE=${SKYWALKING_ENABLE:-"false"} +export SW_AGENT_COLLECTOR_BACKEND_SERVICES=${SW_AGENT_COLLECTOR_BACKEND_SERVICES:-"127.0.0.1:11800"} +export SW_GRPC_LOG_SERVER_HOST=${SW_GRPC_LOG_SERVER_HOST:-"127.0.0.1"} +export SW_GRPC_LOG_SERVER_PORT=${SW_GRPC_LOG_SERVER_PORT:-"11800"} +# dolphinscheduler env export HADOOP_HOME=${HADOOP_HOME:-"/opt/soft/hadoop"} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/opt/soft/hadoop/etc/hadoop"} export SPARK_HOME1=${SPARK_HOME1:-"/opt/soft/spark1"} export SPARK_HOME2=${SPARK_HOME2:-"/opt/soft/spark2"} export PYTHON_HOME=${PYTHON_HOME:-"/usr/bin/python"} -export JAVA_HOME=${JAVA_HOME:-"/usr/lib/jvm/java-1.8-openjdk"} +export JAVA_HOME=${JAVA_HOME:-"/usr/local/openjdk-8"} export HIVE_HOME=${HIVE_HOME:-"/opt/soft/hive"} export FLINK_HOME=${FLINK_HOME:-"/opt/soft/flink"} export DATAX_HOME=${DATAX_HOME:-"/opt/soft/datax"} -#============================================================================ -# Zookeeper -#============================================================================ -export ZOOKEEPER_QUORUM=${ZOOKEEPER_QUORUM:-"127.0.0.1:2181"} -export ZOOKEEPER_ROOT=${ZOOKEEPER_ROOT:-"/dolphinscheduler"} - #============================================================================ # Master Server #============================================================================ +export MASTER_SERVER_OPTS=${MASTER_SERVER_OPTS:-"-Xms1g -Xmx1g -Xmn512m"} export MASTER_EXEC_THREADS=${MASTER_EXEC_THREADS:-"100"} export MASTER_EXEC_TASK_NUM=${MASTER_EXEC_TASK_NUM:-"20"} +export MASTER_DISPATCH_TASK_NUM=${MASTER_DISPATCH_TASK_NUM:-"3"} +export MASTER_HOST_SELECTOR=${MASTER_HOST_SELECTOR:-"LowerWeight"} export MASTER_HEARTBEAT_INTERVAL=${MASTER_HEARTBEAT_INTERVAL:-"10"} export MASTER_TASK_COMMIT_RETRYTIMES=${MASTER_TASK_COMMIT_RETRYTIMES:-"5"} export MASTER_TASK_COMMIT_INTERVAL=${MASTER_TASK_COMMIT_INTERVAL:-"1000"} -export MASTER_MAX_CPULOAD_AVG=${MASTER_MAX_CPULOAD_AVG:-"100"} -export MASTER_RESERVED_MEMORY=${MASTER_RESERVED_MEMORY:-"0.1"} -export MASTER_LISTEN_PORT=${MASTER_LISTEN_PORT:-"5678"} +export MASTER_MAX_CPULOAD_AVG=${MASTER_MAX_CPULOAD_AVG:-"-1"} +export MASTER_RESERVED_MEMORY=${MASTER_RESERVED_MEMORY:-"0.3"} #============================================================================ # Worker Server #============================================================================ +export WORKER_SERVER_OPTS=${WORKER_SERVER_OPTS:-"-Xms1g -Xmx1g -Xmn512m"} export WORKER_EXEC_THREADS=${WORKER_EXEC_THREADS:-"100"} export WORKER_HEARTBEAT_INTERVAL=${WORKER_HEARTBEAT_INTERVAL:-"10"} -export WORKER_MAX_CPULOAD_AVG=${WORKER_MAX_CPULOAD_AVG:-"100"} -export WORKER_RESERVED_MEMORY=${WORKER_RESERVED_MEMORY:-"0.1"} -export WORKER_LISTEN_PORT=${WORKER_LISTEN_PORT:-"1234"} +export WORKER_MAX_CPULOAD_AVG=${WORKER_MAX_CPULOAD_AVG:-"-1"} +export WORKER_RESERVED_MEMORY=${WORKER_RESERVED_MEMORY:-"0.3"} export WORKER_GROUPS=${WORKER_GROUPS:-"default"} #============================================================================ # Alert Server #============================================================================ +export ALERT_SERVER_OPTS=${ALERT_SERVER_OPTS:-"-Xms512m -Xmx512m -Xmn256m"} # xls file export XLS_FILE_PATH=${XLS_FILE_PATH:-"/tmp/xls"} # mail @@ -108,6 +123,16 @@ export ENTERPRISE_WECHAT_SECRET=${ENTERPRISE_WECHAT_SECRET:-""} export ENTERPRISE_WECHAT_AGENT_ID=${ENTERPRISE_WECHAT_AGENT_ID:-""} export ENTERPRISE_WECHAT_USERS=${ENTERPRISE_WECHAT_USERS:-""} +#============================================================================ +# Api Server +#============================================================================ +export API_SERVER_OPTS=${API_SERVER_OPTS:-"-Xms512m -Xmx512m -Xmn256m"} + +#============================================================================ +# Logger Server +#============================================================================ +export LOGGER_SERVER_OPTS=${LOGGER_SERVER_OPTS:-"-Xms512m -Xmx512m -Xmn256m"} + echo "generate dolphinscheduler config" ls ${DOLPHINSCHEDULER_HOME}/conf/ | grep ".tpl" | while read line; do eval "cat << EOF @@ -116,9 +141,9 @@ EOF " > ${DOLPHINSCHEDULER_HOME}/conf/${line%.*} done -# generate dolphinscheduler env only in docker +# generate dolphinscheduler env DOLPHINSCHEDULER_ENV_PATH=${DOLPHINSCHEDULER_HOME}/conf/env/dolphinscheduler_env.sh -if [ -z "${KUBERNETES_SERVICE_HOST}" ] && [ -r "${DOLPHINSCHEDULER_ENV_PATH}.tpl" ]; then +if [ -r "${DOLPHINSCHEDULER_ENV_PATH}.tpl" ]; then eval "cat << EOF $(cat ${DOLPHINSCHEDULER_ENV_PATH}.tpl) EOF diff --git a/docker/build/startup.sh b/docker/build/startup.sh index 934fac3ae7..ae1ed36776 100755 --- a/docker/build/startup.sh +++ b/docker/build/startup.sh @@ -27,7 +27,7 @@ export LOGGER_START_ENABLED=false # wait database waitDatabase() { - echo "test ${DATABASE_TYPE} service" + echo "try to connect ${DATABASE_TYPE} ..." while ! nc -z ${DATABASE_HOST} ${DATABASE_PORT}; do local counter=$((counter+1)) if [ $counter == 30 ]; then @@ -37,21 +37,7 @@ waitDatabase() { echo "Trying to connect to ${DATABASE_TYPE} at ${DATABASE_HOST}:${DATABASE_PORT}. Attempt $counter." sleep 5 done - - echo "connect ${DATABASE_TYPE} service" - if [ ${DATABASE_TYPE} = "mysql" ]; then - v=$(mysql -h${DATABASE_HOST} -P${DATABASE_PORT} -u${DATABASE_USERNAME} --password=${DATABASE_PASSWORD} -D ${DATABASE_DATABASE} -e "select 1" 2>&1) - if [ "$(echo ${v} | grep 'ERROR' | wc -l)" -eq 1 ]; then - echo "Error: Can't connect to database...${v}" - exit 1 - fi - else - v=$(PGPASSWORD=${DATABASE_PASSWORD} psql -h ${DATABASE_HOST} -p ${DATABASE_PORT} -U ${DATABASE_USERNAME} -d ${DATABASE_DATABASE} -tAc "select 1") - if [ "$(echo ${v} | grep 'FATAL' | wc -l)" -eq 1 ]; then - echo "Error: Can't connect to database...${v}" - exit 1 - fi - fi + echo "${DATABASE_TYPE} connection is ok" } # init database @@ -60,38 +46,9 @@ initDatabase() { ${DOLPHINSCHEDULER_HOME}/script/create-dolphinscheduler.sh } -# check ds version -checkDSVersion() { - if [ ${DATABASE_TYPE} = "mysql" ]; then - v=$(mysql -h${DATABASE_HOST} -P${DATABASE_PORT} -u${DATABASE_USERNAME} --password=${DATABASE_PASSWORD} -D ${DATABASE_DATABASE} -e "SELECT * FROM public.t_ds_version" 2>/dev/null) - else - v=$(PGPASSWORD=${DATABASE_PASSWORD} psql -h ${DATABASE_HOST} -p ${DATABASE_PORT} -U ${DATABASE_USERNAME} -d ${DATABASE_DATABASE} -tAc "SELECT * FROM public.t_ds_version" 2>/dev/null) - fi - if [ -n "$v" ]; then - echo "ds version: $v" - return 0 - else - return 1 - fi -} - -# check init database -checkInitDatabase() { - echo "check init database" - while ! checkDSVersion; do - local counter=$((counter+1)) - if [ $counter == 30 ]; then - echo "Error: Couldn't check init database." - exit 1 - fi - echo "Trying to check init database. Attempt $counter." - sleep 5 - done -} - # wait zk waitZK() { - echo "connect remote zookeeper" + echo "try to connect zookeeper ..." echo "${ZOOKEEPER_QUORUM}" | awk -F ',' 'BEGIN{ i=1 }{ while( i <= NF ){ print $i; i++ } }' | while read line; do while ! nc -z ${line%:*} ${line#*:}; do local counter=$((counter+1)) @@ -103,6 +60,7 @@ waitZK() { sleep 5 done done + echo "zookeeper connection is ok" } # print usage @@ -150,7 +108,6 @@ case "$1" in ;; (alert-server) waitDatabase - checkInitDatabase export ALERT_START_ENABLED=true ;; (help) diff --git a/docker/docker-swarm/config.env.sh b/docker/docker-swarm/config.env.sh new file mode 100755 index 0000000000..aac31f5445 --- /dev/null +++ b/docker/docker-swarm/config.env.sh @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#============================================================================ +# Database +#============================================================================ +# postgresql +DATABASE_TYPE=postgresql +DATABASE_DRIVER=org.postgresql.Driver +DATABASE_HOST=dolphinscheduler-postgresql +DATABASE_PORT=5432 +DATABASE_USERNAME=root +DATABASE_PASSWORD=root +DATABASE_DATABASE=dolphinscheduler +DATABASE_PARAMS=characterEncoding=utf8 +# mysql +# DATABASE_TYPE=mysql +# DATABASE_DRIVER=com.mysql.jdbc.Driver +# DATABASE_HOST=dolphinscheduler-mysql +# DATABASE_PORT=3306 +# DATABASE_USERNAME=root +# DATABASE_PASSWORD=root +# DATABASE_DATABASE=dolphinscheduler +# DATABASE_PARAMS=useUnicode=true&characterEncoding=UTF-8 + +#============================================================================ +# ZooKeeper +#============================================================================ +ZOOKEEPER_QUORUM=dolphinscheduler-zookeeper:2181 +ZOOKEEPER_ROOT=/dolphinscheduler + +#============================================================================ +# Common +#============================================================================ +# common opts +DOLPHINSCHEDULER_OPTS= +# common env +DATA_BASEDIR_PATH=/tmp/dolphinscheduler +RESOURCE_STORAGE_TYPE=HDFS +RESOURCE_UPLOAD_PATH=/dolphinscheduler +FS_DEFAULT_FS=file:/// +FS_S3A_ENDPOINT=s3.xxx.amazonaws.com +FS_S3A_ACCESS_KEY=xxxxxxx +FS_S3A_SECRET_KEY=xxxxxxx +HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE=false +JAVA_SECURITY_KRB5_CONF_PATH=/opt/krb5.conf +LOGIN_USER_KEYTAB_USERNAME=hdfs@HADOOP.COM +LOGIN_USER_KEYTAB_PATH=/opt/hdfs.keytab +KERBEROS_EXPIRE_TIME=2 +HDFS_ROOT_USER=hdfs +YARN_RESOURCEMANAGER_HA_RM_IDS= +YARN_APPLICATION_STATUS_ADDRESS=http://ds1:8088/ws/v1/cluster/apps/%s +# skywalking +SKYWALKING_ENABLE=false +SW_AGENT_COLLECTOR_BACKEND_SERVICES=127.0.0.1:11800 +SW_GRPC_LOG_SERVER_HOST=127.0.0.1 +SW_GRPC_LOG_SERVER_PORT=11800 +# dolphinscheduler env +HADOOP_HOME=/opt/soft/hadoop +HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop +SPARK_HOME1=/opt/soft/spark1 +SPARK_HOME2=/opt/soft/spark2 +PYTHON_HOME=/usr/bin/python +JAVA_HOME=/usr/local/openjdk-8 +HIVE_HOME=/opt/soft/hive +FLINK_HOME=/opt/soft/flink +DATAX_HOME=/opt/soft/datax + +#============================================================================ +# Master Server +#============================================================================ +MASTER_SERVER_OPTS=-Xms1g -Xmx1g -Xmn512m +MASTER_EXEC_THREADS=100 +MASTER_EXEC_TASK_NUM=20 +MASTER_DISPATCH_TASK_NUM=3 +MASTER_HOST_SELECTOR=LowerWeight +MASTER_HEARTBEAT_INTERVAL=10 +MASTER_TASK_COMMIT_RETRYTIMES=5 +MASTER_TASK_COMMIT_INTERVAL=1000 +MASTER_MAX_CPULOAD_AVG=-1 +MASTER_RESERVED_MEMORY=0.3 + +#============================================================================ +# Worker Server +#============================================================================ +WORKER_SERVER_OPTS=-Xms1g -Xmx1g -Xmn512m +WORKER_EXEC_THREADS=100 +WORKER_HEARTBEAT_INTERVAL=10 +WORKER_MAX_CPULOAD_AVG=-1 +WORKER_RESERVED_MEMORY=0.3 +WORKER_GROUPS=default + +#============================================================================ +# Alert Server +#============================================================================ +ALERT_SERVER_OPTS=-Xms512m -Xmx512m -Xmn256m +# xls file +XLS_FILE_PATH=/tmp/xls +# mail +MAIL_SERVER_HOST= +MAIL_SERVER_PORT= +MAIL_SENDER= +MAIL_USER= +MAIL_PASSWD= +MAIL_SMTP_STARTTLS_ENABLE=true +MAIL_SMTP_SSL_ENABLE=false +MAIL_SMTP_SSL_TRUST= +# wechat +ENTERPRISE_WECHAT_ENABLE=false +ENTERPRISE_WECHAT_CORP_ID= +ENTERPRISE_WECHAT_SECRET= +ENTERPRISE_WECHAT_AGENT_ID= +ENTERPRISE_WECHAT_USERS= + +#============================================================================ +# Api Server +#============================================================================ +API_SERVER_OPTS=-Xms512m -Xmx512m -Xmn256m + +#============================================================================ +# Logger Server +#============================================================================ +LOGGER_SERVER_OPTS=-Xms512m -Xmx512m -Xmn256m diff --git a/docker/docker-swarm/docker-compose.yml b/docker/docker-swarm/docker-compose.yml index b2c1e0d1a8..af27629506 100644 --- a/docker/docker-swarm/docker-compose.yml +++ b/docker/docker-swarm/docker-compose.yml @@ -21,8 +21,6 @@ services: dolphinscheduler-postgresql: image: bitnami/postgresql:latest container_name: dolphinscheduler-postgresql - ports: - - 5432:5432 environment: TZ: Asia/Shanghai POSTGRESQL_USERNAME: root @@ -38,8 +36,6 @@ services: dolphinscheduler-zookeeper: image: bitnami/zookeeper:latest container_name: dolphinscheduler-zookeeper - ports: - - 2181:2181 environment: TZ: Asia/Shanghai ALLOW_ANONYMOUS_LOGIN: "yes" @@ -58,20 +54,7 @@ services: - 12345:12345 environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms512m -Xmx512m -Xmn256m" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 - ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 - DATA_BASEDIR_PATH: /tmp/dolphinscheduler - RESOURCE_STORAGE_TYPE: HDFS - RESOURCE_UPLOAD_PATH: /dolphinscheduler - FS_DEFAULT_FS: file:/// + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "ApiApplicationServer"] interval: 30s @@ -82,6 +65,7 @@ services: - dolphinscheduler-zookeeper volumes: - dolphinscheduler-logs:/opt/dolphinscheduler/logs + - dolphinscheduler-shared-local:/opt/soft - dolphinscheduler-resource-local:/dolphinscheduler restart: unless-stopped networks: @@ -93,29 +77,7 @@ services: command: alert-server environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms512m -Xmx512m -Xmn256m" - XLS_FILE_PATH: "/tmp/xls" - MAIL_SERVER_HOST: "" - MAIL_SERVER_PORT: "" - MAIL_SENDER: "" - MAIL_USER: "" - MAIL_PASSWD: "" - MAIL_SMTP_STARTTLS_ENABLE: "false" - MAIL_SMTP_SSL_ENABLE: "false" - MAIL_SMTP_SSL_TRUST: "" - ENTERPRISE_WECHAT_ENABLE: "false" - ENTERPRISE_WECHAT_CORP_ID: "" - ENTERPRISE_WECHAT_SECRET: "" - ENTERPRISE_WECHAT_AGENT_ID: "" - ENTERPRISE_WECHAT_USERS: "" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "AlertServer"] interval: 30s @@ -133,29 +95,9 @@ services: image: apache/dolphinscheduler:latest container_name: dolphinscheduler-master command: master-server - ports: - - 5678:5678 environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms1g -Xmx1g -Xmn512m" - MASTER_EXEC_THREADS: "100" - MASTER_EXEC_TASK_NUM: "20" - MASTER_HEARTBEAT_INTERVAL: "10" - MASTER_TASK_COMMIT_RETRYTIMES: "5" - MASTER_TASK_COMMIT_INTERVAL: "1000" - MASTER_MAX_CPULOAD_AVG: "100" - MASTER_RESERVED_MEMORY: "0.1" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 - ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 - HADOOP_HOME: "/opt/soft/hadoop" - HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop" + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "MasterServer"] interval: 30s @@ -175,49 +117,9 @@ services: image: apache/dolphinscheduler:latest container_name: dolphinscheduler-worker command: worker-server - ports: - - 1234:1234 - - 50051:50051 environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms1g -Xmx1g -Xmn512m" - LOGGER_SERVER_OPTS: "-Xms512m -Xmx512m -Xmn256m" - WORKER_EXEC_THREADS: "100" - WORKER_HEARTBEAT_INTERVAL: "10" - WORKER_MAX_CPULOAD_AVG: "100" - WORKER_RESERVED_MEMORY: "0.1" - WORKER_GROUPS: "default" - XLS_FILE_PATH: "/tmp/xls" - MAIL_SERVER_HOST: "" - MAIL_SERVER_PORT: "" - MAIL_SENDER: "" - MAIL_USER: "" - MAIL_PASSWD: "" - MAIL_SMTP_STARTTLS_ENABLE: "false" - MAIL_SMTP_SSL_ENABLE: "false" - MAIL_SMTP_SSL_TRUST: "" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 - ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 - DATA_BASEDIR_PATH: /tmp/dolphinscheduler - RESOURCE_STORAGE_TYPE: HDFS - RESOURCE_UPLOAD_PATH: /dolphinscheduler - FS_DEFAULT_FS: file:/// - HADOOP_HOME: "/opt/soft/hadoop" - HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop" - SPARK_HOME1: "/opt/soft/spark1" - SPARK_HOME2: "/opt/soft/spark2" - PYTHON_HOME: "/usr/bin/python" - JAVA_HOME: "/usr/lib/jvm/java-1.8-openjdk" - HIVE_HOME: "/opt/soft/hive" - FLINK_HOME: "/opt/soft/flink" - DATAX_HOME: "/opt/soft/datax" + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "WorkerServer"] interval: 30s diff --git a/docker/docker-swarm/docker-stack.yml b/docker/docker-swarm/docker-stack.yml index 5490fdcb8e..a073fdc120 100644 --- a/docker/docker-swarm/docker-stack.yml +++ b/docker/docker-swarm/docker-stack.yml @@ -20,8 +20,6 @@ services: dolphinscheduler-postgresql: image: bitnami/postgresql:latest - ports: - - 5432:5432 environment: TZ: Asia/Shanghai POSTGRESQL_USERNAME: root @@ -37,8 +35,6 @@ services: dolphinscheduler-zookeeper: image: bitnami/zookeeper:latest - ports: - - 2181:2181 environment: TZ: Asia/Shanghai ALLOW_ANONYMOUS_LOGIN: "yes" @@ -58,20 +54,7 @@ services: - 12345:12345 environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms512m -Xmx512m -Xmn256m" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 - ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 - DATA_BASEDIR_PATH: /tmp/dolphinscheduler - RESOURCE_STORAGE_TYPE: HDFS - RESOURCE_UPLOAD_PATH: /dolphinscheduler - FS_DEFAULT_FS: file:/// + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "ApiApplicationServer"] interval: 30s @@ -79,6 +62,8 @@ services: retries: 3 volumes: - dolphinscheduler-logs:/opt/dolphinscheduler/logs + - dolphinscheduler-shared-local:/opt/soft + - dolphinscheduler-resource-local:/dolphinscheduler networks: - dolphinscheduler deploy: @@ -90,29 +75,7 @@ services: command: alert-server environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms512m -Xmx512m -Xmn256m" - XLS_FILE_PATH: "/tmp/xls" - MAIL_SERVER_HOST: "" - MAIL_SERVER_PORT: "" - MAIL_SENDER: "" - MAIL_USER: "" - MAIL_PASSWD: "" - MAIL_SMTP_STARTTLS_ENABLE: "false" - MAIL_SMTP_SSL_ENABLE: "false" - MAIL_SMTP_SSL_TRUST: "" - ENTERPRISE_WECHAT_ENABLE: "false" - ENTERPRISE_WECHAT_CORP_ID: "" - ENTERPRISE_WECHAT_SECRET: "" - ENTERPRISE_WECHAT_AGENT_ID: "" - ENTERPRISE_WECHAT_USERS: "" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "AlertServer"] interval: 30s @@ -129,29 +92,9 @@ services: dolphinscheduler-master: image: apache/dolphinscheduler:latest command: master-server - ports: - - 5678:5678 environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms1g -Xmx1g -Xmn512m" - MASTER_EXEC_THREADS: "100" - MASTER_EXEC_TASK_NUM: "20" - MASTER_HEARTBEAT_INTERVAL: "10" - MASTER_TASK_COMMIT_RETRYTIMES: "5" - MASTER_TASK_COMMIT_INTERVAL: "1000" - MASTER_MAX_CPULOAD_AVG: "100" - MASTER_RESERVED_MEMORY: "0.1" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 - ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 - HADOOP_HOME: "/opt/soft/hadoop" - HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop" + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "MasterServer"] interval: 30s @@ -159,6 +102,7 @@ services: retries: 3 volumes: - dolphinscheduler-logs:/opt/dolphinscheduler/logs + - dolphinscheduler-shared-local:/opt/soft networks: - dolphinscheduler deploy: @@ -168,49 +112,9 @@ services: dolphinscheduler-worker: image: apache/dolphinscheduler:latest command: worker-server - ports: - - 1234:1234 - - 50051:50051 environment: TZ: Asia/Shanghai - DOLPHINSCHEDULER_OPTS: "-Xms1g -Xmx1g -Xmn512m" - LOGGER_SERVER_OPTS: "-Xms512m -Xmx512m -Xmn256m" - WORKER_EXEC_THREADS: "100" - WORKER_HEARTBEAT_INTERVAL: "10" - WORKER_MAX_CPULOAD_AVG: "100" - WORKER_RESERVED_MEMORY: "0.1" - WORKER_GROUPS: "default" - XLS_FILE_PATH: "/tmp/xls" - MAIL_SERVER_HOST: "" - MAIL_SERVER_PORT: "" - MAIL_SENDER: "" - MAIL_USER: "" - MAIL_PASSWD: "" - MAIL_SMTP_STARTTLS_ENABLE: "false" - MAIL_SMTP_SSL_ENABLE: "false" - MAIL_SMTP_SSL_TRUST: "" - DATABASE_TYPE: postgresql - DATABASE_DRIVER: org.postgresql.Driver - DATABASE_HOST: dolphinscheduler-postgresql - DATABASE_PORT: 5432 - DATABASE_USERNAME: root - DATABASE_PASSWORD: root - DATABASE_DATABASE: dolphinscheduler - DATABASE_PARAMS: characterEncoding=utf8 - ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 - DATA_BASEDIR_PATH: /tmp/dolphinscheduler - RESOURCE_STORAGE_TYPE: HDFS - RESOURCE_UPLOAD_PATH: /dolphinscheduler - FS_DEFAULT_FS: file:/// - HADOOP_HOME: "/opt/soft/hadoop" - HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop" - SPARK_HOME1: "/opt/soft/spark1" - SPARK_HOME2: "/opt/soft/spark2" - PYTHON_HOME: "/usr/bin/python" - JAVA_HOME: "/usr/lib/jvm/java-1.8-openjdk" - HIVE_HOME: "/opt/soft/hive" - FLINK_HOME: "/opt/soft/flink" - DATAX_HOME: "/opt/soft/datax" + env_file: config.env.sh healthcheck: test: ["CMD", "/root/checkpoint.sh", "WorkerServer"] interval: 30s @@ -219,6 +123,8 @@ services: volumes: - dolphinscheduler-worker-data:/tmp/dolphinscheduler - dolphinscheduler-logs:/opt/dolphinscheduler/logs + - dolphinscheduler-shared-local:/opt/soft + - dolphinscheduler-resource-local:/dolphinscheduler networks: - dolphinscheduler deploy: @@ -233,4 +139,6 @@ volumes: dolphinscheduler-postgresql: dolphinscheduler-zookeeper: dolphinscheduler-worker-data: - dolphinscheduler-logs: \ No newline at end of file + dolphinscheduler-logs: + dolphinscheduler-shared-local: + dolphinscheduler-resource-local: \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/README.md b/docker/kubernetes/dolphinscheduler/README.md index 880340fb41..e558753464 100644 --- a/docker/kubernetes/dolphinscheduler/README.md +++ b/docker/kubernetes/dolphinscheduler/README.md @@ -41,7 +41,7 @@ If `ingress.enabled` in `values.yaml` is set to `true`, you just access `http:// > **Tip**: If there is a problem with ingress access, please contact the Kubernetes administrator and refer to the [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) -Otherwise, you need to execute port-forward command like: +Otherwise, when `api.service.type=ClusterIP` you need to execute port-forward command like: ```bash $ kubectl port-forward --address 0.0.0.0 svc/dolphinscheduler-api 12345:12345 @@ -52,6 +52,16 @@ $ kubectl port-forward --address 0.0.0.0 -n test svc/dolphinscheduler-api 12345: And then access the web: http://192.168.xx.xx:12345/dolphinscheduler +Or when `api.service.type=NodePort` you need to execute the command: + +```bash +NODE_IP=$(kubectl get no -n {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") +NODE_PORT=$(kubectl get svc {{ template "dolphinscheduler.fullname" . }}-api -n {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}") +echo http://$NODE_IP:$NODE_PORT/dolphinscheduler +``` + +And then access the web: http://$NODE_IP:$NODE_PORT/dolphinscheduler + The default username is `admin` and the default password is `dolphinscheduler123` > **Tip**: For quick start in docker, you can create a tenant named `ds` and associate the user `admin` with the tenant `ds` @@ -74,6 +84,44 @@ $ kubectl delete pvc -l app.kubernetes.io/instance=dolphinscheduler > **Note**: Deleting the PVC's will delete all data as well. Please be cautious before doing it. +## Support Matrix + +| Type | Support | Notes | +| ------------------------------------------------------------ | ------------ | ------------------------------------- | +| Shell | Yes | | +| Python2 | Yes | | +| Python3 | Indirect Yes | Refer to FAQ | +| Hadoop2 | Indirect Yes | Refer to FAQ | +| Hadoop3 | Not Sure | Not tested | +| Spark-Local(client) | Indirect Yes | Refer to FAQ | +| Spark-YARN(cluster) | Indirect Yes | Refer to FAQ | +| Spark-Mesos(cluster) | Not Yet | | +| Spark-Standalone(cluster) | Not Yet | | +| Spark-Kubernetes(cluster) | Not Yet | | +| Flink-Local(local>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-YARN(yarn-cluster) | Indirect Yes | Refer to FAQ | +| Flink-YARN(yarn-session/yarn-per-job/yarn-application>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-Mesos(default) | Not Yet | | +| Flink-Mesos(remote>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-Standalone(default) | Not Yet | | +| Flink-Standalone(remote>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-Kubernetes(default) | Not Yet | | +| Flink-Kubernetes(remote>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| Flink-NativeKubernetes(kubernetes-session/application>=1.11) | Not Yet | Generic CLI mode is not yet supported | +| MapReduce | Indirect Yes | Refer to FAQ | +| Kerberos | Indirect Yes | Refer to FAQ | +| HTTP | Yes | | +| DataX | Indirect Yes | Refer to FAQ | +| Sqoop | Indirect Yes | Refer to FAQ | +| SQL-MySQL | Indirect Yes | Refer to FAQ | +| SQL-PostgreSQL | Yes | | +| SQL-Hive | Indirect Yes | Refer to FAQ | +| SQL-Spark | Indirect Yes | Refer to FAQ | +| SQL-ClickHouse | Indirect Yes | Refer to FAQ | +| SQL-Oracle | Indirect Yes | Refer to FAQ | +| SQL-SQLServer | Indirect Yes | Refer to FAQ | +| SQL-DB2 | Indirect Yes | Refer to FAQ | + ## Configuration The configuration file is `values.yaml`, and the following tables lists the configurable parameters of the DolphinScheduler chart and their default values. @@ -105,7 +153,6 @@ The configuration file is `values.yaml`, and the following tables lists the conf | | | | | `zookeeper.enabled` | If not exists external Zookeeper, by default, the DolphinScheduler will use a internal Zookeeper | `true` | | `zookeeper.fourlwCommandsWhitelist` | A list of comma separated Four Letter Words commands to use | `srvr,ruok,wchs,cons` | -| `zookeeper.service.port` | ZooKeeper port | `2181` | | `zookeeper.persistence.enabled` | Set `zookeeper.persistence.enabled` to `true` to mount a new volume for internal Zookeeper | `false` | | `zookeeper.persistence.size` | `PersistentVolumeClaim` size | `20Gi` | | `zookeeper.persistence.storageClass` | Zookeeper data persistent volume storage class. If set to "-", storageClassName: "", which disables dynamic provisioning | `-` | @@ -113,6 +160,7 @@ The configuration file is `values.yaml`, and the following tables lists the conf | `externalZookeeper.zookeeperQuorum` | If exists external Zookeeper, and set `zookeeper.enabled` value to false. Specify Zookeeper quorum | `127.0.0.1:2181` | | `externalZookeeper.zookeeperRoot` | If exists external Zookeeper, and set `zookeeper.enabled` value to false. Specify dolphinscheduler root directory in Zookeeper | `/dolphinscheduler` | | | | | +| `common.configmap.DOLPHINSCHEDULER_OPTS` | The jvm options for dolphinscheduler, suitable for all servers | `""` | | `common.configmap.DATA_BASEDIR_PATH` | User data directory path, self configuration, please make sure the directory exists and have read write permissions | `/tmp/dolphinscheduler` | | `common.configmap.RESOURCE_STORAGE_TYPE` | Resource storage type: HDFS, S3, NONE | `HDFS` | | `common.configmap.RESOURCE_UPLOAD_PATH` | Resource store on HDFS/S3 path, please make sure the directory exists on hdfs and have read write permissions | `/dolphinscheduler` | @@ -120,7 +168,27 @@ The configuration file is `values.yaml`, and the following tables lists the conf | `common.configmap.FS_S3A_ENDPOINT` | S3 endpoint when `common.configmap.RESOURCE_STORAGE_TYPE` is set to `S3` | `s3.xxx.amazonaws.com` | | `common.configmap.FS_S3A_ACCESS_KEY` | S3 access key when `common.configmap.RESOURCE_STORAGE_TYPE` is set to `S3` | `xxxxxxx` | | `common.configmap.FS_S3A_SECRET_KEY` | S3 secret key when `common.configmap.RESOURCE_STORAGE_TYPE` is set to `S3` | `xxxxxxx` | -| `common.configmap.DOLPHINSCHEDULER_ENV` | System env path, self configuration, please read `values.yaml` | `[]` | +| `common.configmap.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE` | Whether to startup kerberos | `false` | +| `common.configmap.JAVA_SECURITY_KRB5_CONF_PATH` | The java.security.krb5.conf path | `/opt/krb5.conf` | +| `common.configmap.LOGIN_USER_KEYTAB_USERNAME` | The login user from keytab username | `hdfs@HADOOP.COM` | +| `common.configmap.LOGIN_USER_KEYTAB_PATH` | The login user from keytab path | `/opt/hdfs.keytab` | +| `common.configmap.KERBEROS_EXPIRE_TIME` | The kerberos expire time, the unit is hour | `2` | +| `common.configmap.HDFS_ROOT_USER` | The HDFS root user who must have the permission to create directories under the HDFS root path | `hdfs` | +| `common.configmap.YARN_RESOURCEMANAGER_HA_RM_IDS` | If resourcemanager HA is enabled, please set the HA IPs | `nil` | +| `common.configmap.YARN_APPLICATION_STATUS_ADDRESS` | If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname, otherwise keep default | `http://ds1:8088/ws/v1/cluster/apps/%s` | +| `common.configmap.SKYWALKING_ENABLE` | Set whether to enable skywalking | `false` | +| `common.configmap.SW_AGENT_COLLECTOR_BACKEND_SERVICES` | Set agent collector backend services for skywalking | `127.0.0.1:11800` | +| `common.configmap.SW_GRPC_LOG_SERVER_HOST` | Set grpc log server host for skywalking | `127.0.0.1` | +| `common.configmap.SW_GRPC_LOG_SERVER_PORT` | Set grpc log server port for skywalking | `11800` | +| `common.configmap.HADOOP_HOME` | Set `HADOOP_HOME` for DolphinScheduler's task environment | `/opt/soft/hadoop` | +| `common.configmap.HADOOP_CONF_DIR` | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment | `/opt/soft/hadoop/etc/hadoop` | +| `common.configmap.SPARK_HOME1` | Set `SPARK_HOME1` for DolphinScheduler's task environment | `/opt/soft/spark1` | +| `common.configmap.SPARK_HOME2` | Set `SPARK_HOME2` for DolphinScheduler's task environment | `/opt/soft/spark2` | +| `common.configmap.PYTHON_HOME` | Set `PYTHON_HOME` for DolphinScheduler's task environment | `/usr/bin/python` | +| `common.configmap.JAVA_HOME` | Set `JAVA_HOME` for DolphinScheduler's task environment | `/usr/local/openjdk-8` | +| `common.configmap.HIVE_HOME` | Set `HIVE_HOME` for DolphinScheduler's task environment | `/opt/soft/hive` | +| `common.configmap.FLINK_HOME` | Set `FLINK_HOME` for DolphinScheduler's task environment | `/opt/soft/flink` | +| `common.configmap.DATAX_HOME` | Set `DATAX_HOME` for DolphinScheduler's task environment | `/opt/soft/datax` | | `common.sharedStoragePersistence.enabled` | Set `common.sharedStoragePersistence.enabled` to `true` to mount a shared storage volume for Hadoop, Spark binary and etc | `false` | | `common.sharedStoragePersistence.mountPath` | The mount path for the shared storage volume | `/opt/soft` | | `common.sharedStoragePersistence.accessModes` | `PersistentVolumeClaim` access modes, must be `ReadWriteMany` | `[ReadWriteMany]` | @@ -138,15 +206,16 @@ The configuration file is `values.yaml`, and the following tables lists the conf | `master.nodeSelector` | NodeSelector is a selector which must be true for the pod to fit on a node | `{}` | | `master.tolerations` | If specified, the pod's tolerations | `{}` | | `master.resources` | The `resource` limit and request config for master server | `{}` | -| `master.configmap.DOLPHINSCHEDULER_OPTS` | The jvm options for master server | `""` | -| `master.configmap.MASTER_EXEC_THREADS` | Master execute thread num | `100` | +| `master.configmap.MASTER_SERVER_OPTS` | The jvm options for master server | `-Xms1g -Xmx1g -Xmn512m` | +| `master.configmap.MASTER_EXEC_THREADS` | Master execute thread number | `100` | | `master.configmap.MASTER_EXEC_TASK_NUM` | Master execute task number in parallel | `20` | +| `master.configmap.MASTER_DISPATCH_TASK_NUM` | Master dispatch task number | `3` | +| `master.configmap.MASTER_HOST_SELECTOR` | Master host selector to select a suitable worker, optional values include Random, RoundRobin, LowerWeight | `LowerWeight` | | `master.configmap.MASTER_HEARTBEAT_INTERVAL` | Master heartbeat interval | `10` | | `master.configmap.MASTER_TASK_COMMIT_RETRYTIMES` | Master commit task retry times | `5` | | `master.configmap.MASTER_TASK_COMMIT_INTERVAL` | Master commit task interval | `1000` | -| `master.configmap.MASTER_MAX_CPULOAD_AVG` | Only less than cpu avg load, master server can work. default value : the number of cpu cores * 2 | `100` | -| `master.configmap.MASTER_RESERVED_MEMORY` | Only larger than reserved memory, master server can work. default value : physical memory * 1/10, unit is G | `0.1` | -| `master.configmap.MASTER_LISTEN_PORT` | Master listen port | `5678` | +| `master.configmap.MASTER_MAX_CPULOAD_AVG` | Only less than cpu avg load, master server can work. default value : the number of cpu cores * 2 | `-1` | +| `master.configmap.MASTER_RESERVED_MEMORY` | Only larger than reserved memory, master server can work. default value : physical memory * 1/10, unit is G | `0.3` | | `master.livenessProbe.enabled` | Turn on and off liveness probe | `true` | | `master.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `30` | | `master.livenessProbe.periodSeconds` | How often to perform the probe | `30` | @@ -171,13 +240,12 @@ The configuration file is `values.yaml`, and the following tables lists the conf | `worker.nodeSelector` | NodeSelector is a selector which must be true for the pod to fit on a node | `{}` | | `worker.tolerations` | If specified, the pod's tolerations | `{}` | | `worker.resources` | The `resource` limit and request config for worker server | `{}` | -| `worker.configmap.DOLPHINSCHEDULER_OPTS` | The jvm options for worker server | `""` | -| `worker.configmap.LOGGER_SERVER_OPTS` | The jvm options for logger server (since `logger-server` is deployed with `worker-server`, it needs to be set separately) | `""` | -| `worker.configmap.WORKER_EXEC_THREADS` | Worker execute thread num | `100` | +| `worker.configmap.LOGGER_SERVER_OPTS` | The jvm options for logger server | `-Xms512m -Xmx512m -Xmn256m` | +| `worker.configmap.WORKER_SERVER_OPTS` | The jvm options for worker server | `-Xms1g -Xmx1g -Xmn512m` | +| `worker.configmap.WORKER_EXEC_THREADS` | Worker execute thread number | `100` | | `worker.configmap.WORKER_HEARTBEAT_INTERVAL` | Worker heartbeat interval | `10` | -| `worker.configmap.WORKER_MAX_CPULOAD_AVG` | Only less than cpu avg load, worker server can work. default value : the number of cpu cores * 2 | `100` | -| `worker.configmap.WORKER_RESERVED_MEMORY` | Only larger than reserved memory, worker server can work. default value : physical memory * 1/10, unit is G | `0.1` | -| `worker.configmap.WORKER_LISTEN_PORT` | Worker listen port | `1234` | +| `worker.configmap.WORKER_MAX_CPULOAD_AVG` | Only less than cpu avg load, worker server can work. default value : the number of cpu cores * 2 | `-1` | +| `worker.configmap.WORKER_RESERVED_MEMORY` | Only larger than reserved memory, worker server can work. default value : physical memory * 1/10, unit is G | `0.3` | | `worker.configmap.WORKER_GROUPS` | Worker groups | `default` | | `worker.livenessProbe.enabled` | Turn on and off liveness probe | `true` | | `worker.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `30` | @@ -210,7 +278,7 @@ The configuration file is `values.yaml`, and the following tables lists the conf | `alert.nodeSelector` | NodeSelector is a selector which must be true for the pod to fit on a node | `{}` | | `alert.tolerations` | If specified, the pod's tolerations | `{}` | | `alert.resources` | The `resource` limit and request config for alert server | `{}` | -| `alert.configmap.DOLPHINSCHEDULER_OPTS` | The jvm options for alert server | `""` | +| `alert.configmap.ALERT_SERVER_OPTS` | The jvm options for alert server | `-Xms512m -Xmx512m -Xmn256m` | | `alert.configmap.XLS_FILE_PATH` | XLS file path | `/tmp/xls` | | `alert.configmap.MAIL_SERVER_HOST` | Mail `SERVER HOST ` | `nil` | | `alert.configmap.MAIL_SERVER_PORT` | Mail `SERVER PORT` | `nil` | @@ -251,7 +319,7 @@ The configuration file is `values.yaml`, and the following tables lists the conf | `api.nodeSelector` | NodeSelector is a selector which must be true for the pod to fit on a node | `{}` | | `api.tolerations` | If specified, the pod's tolerations | `{}` | | `api.resources` | The `resource` limit and request config for api server | `{}` | -| `api.configmap.DOLPHINSCHEDULER_OPTS` | The jvm options for api server | `""` | +| `api.configmap.API_SERVER_OPTS` | The jvm options for api server | `-Xms512m -Xmx512m -Xmn256m` | | `api.livenessProbe.enabled` | Turn on and off liveness probe | `true` | | `api.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `30` | | `api.livenessProbe.periodSeconds` | How often to perform the probe | `30` | @@ -268,6 +336,13 @@ The configuration file is `values.yaml`, and the following tables lists the conf | `api.persistentVolumeClaim.accessModes` | `PersistentVolumeClaim` access modes | `[ReadWriteOnce]` | | `api.persistentVolumeClaim.storageClassName` | `api` logs data persistent volume storage class. If set to "-", storageClassName: "", which disables dynamic provisioning | `-` | | `api.persistentVolumeClaim.storage` | `PersistentVolumeClaim` size | `20Gi` | +| `api.service.type` | `type` determines how the Service is exposed. Valid options are ExternalName, ClusterIP, NodePort, and LoadBalancer | `ClusterIP` | +| `api.service.clusterIP` | `clusterIP` is the IP address of the service and is usually assigned randomly by the master | `nil` | +| `api.service.nodePort` | `nodePort` is the port on each node on which this service is exposed when type=NodePort | `nil` | +| `api.service.externalIPs` | `externalIPs` is a list of IP addresses for which nodes in the cluster will also accept traffic for this service | `[]` | +| `api.service.externalName` | `externalName` is the external reference that kubedns or equivalent will return as a CNAME record for this service | `nil` | +| `api.service.loadBalancerIP` | `loadBalancerIP` when service.type is LoadBalancer. LoadBalancer will get created with the IP specified in this field | `nil` | +| `api.service.annotations` | `annotations` may need to be set when service.type is LoadBalancer | `{}` | | | | | | `ingress.enabled` | Enable ingress | `false` | | `ingress.host` | Ingress host | `dolphinscheduler.org` | @@ -279,29 +354,28 @@ The configuration file is `values.yaml`, and the following tables lists the conf ### How to use MySQL as the DolphinScheduler's database instead of PostgreSQL? -> Because of the commercial license, we cannot directly use the driver and client of MySQL. +> Because of the commercial license, we cannot directly use the driver of MySQL. > > If you want to use MySQL, you can build a new image based on the `apache/dolphinscheduler` image as follows. 1. Download the MySQL driver [mysql-connector-java-5.1.49.jar](https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar) (require `>=5.1.47`) -2. Create a new `Dockerfile` to add MySQL driver and client: +2. Create a new `Dockerfile` to add MySQL driver: ``` FROM apache/dolphinscheduler:latest COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib -RUN apk add --update --no-cache mysql-client ``` -3. Build a new docker image including MySQL driver and client: +3. Build a new docker image including MySQL driver: ``` -docker build -t apache/dolphinscheduler:mysql . +docker build -t apache/dolphinscheduler:mysql-driver . ``` -4. Push the docker image `apache/dolphinscheduler:mysql` to a docker registry +4. Push the docker image `apache/dolphinscheduler:mysql-driver` to a docker registry -5. Modify image `repository` and update `tag` to `mysql` in `values.yaml` +5. Modify image `repository` and update `tag` to `mysql-driver` in `values.yaml` 6. Modify postgresql `enabled` to `false` @@ -379,4 +453,129 @@ docker build -t apache/dolphinscheduler:oracle-driver . 7. Add a Oracle datasource in `Datasource manage` +### How to support Python 2 pip and custom requirements.txt? + +1. Create a new `Dockerfile` to install pip: + +``` +FROM apache/dolphinscheduler:latest +COPY requirements.txt /tmp +RUN apt-get update && \ + apt-get install -y --no-install-recommends python-pip && \ + pip install --no-cache-dir -r /tmp/requirements.txt && \ + rm -rf /var/lib/apt/lists/* +``` + +The command will install the default **pip 18.1**. If you upgrade the pip, just add one line + +``` + pip install --no-cache-dir -U pip && \ +``` + +2. Build a new docker image including pip: + +``` +docker build -t apache/dolphinscheduler:pip . +``` + +3. Push the docker image `apache/dolphinscheduler:pip` to a docker registry + +4. Modify image `repository` and update `tag` to `pip` in `values.yaml` + +5. Run a DolphinScheduler release in Kubernetes (See **Installing the Chart**) + +6. Verify pip under a new Python task + +### How to support Python 3? + +1. Create a new `Dockerfile` to install Python 3: + +``` +FROM apache/dolphinscheduler:latest +RUN apt-get update && \ + apt-get install -y --no-install-recommends python3 && \ + rm -rf /var/lib/apt/lists/* +``` + +The command will install the default **Python 3.7.3**. If you also want to install **pip3**, just replace `python3` with `python3-pip` like + +``` + apt-get install -y --no-install-recommends python3-pip && \ +``` + +2. Build a new docker image including Python 3: + +``` +docker build -t apache/dolphinscheduler:python3 . +``` + +3. Push the docker image `apache/dolphinscheduler:python3` to a docker registry + +4. Modify image `repository` and update `tag` to `python3` in `values.yaml` + +5. Modify `PYTHON_HOME` to `/usr/bin/python3` in `values.yaml` + +6. Run a DolphinScheduler release in Kubernetes (See **Installing the Chart**) + +7. Verify Python 3 under a new Python task + +### How to support Hadoop, Spark, Flink, Hive or DataX? + +Take Spark 2.4.7 as an example: + +1. Download the Spark 2.4.7 release binary `spark-2.4.7-bin-hadoop2.7.tgz` + +2. Ensure that `common.sharedStoragePersistence.enabled` is turned on + +3. Run a DolphinScheduler release in Kubernetes (See **Installing the Chart**) + +4. Copy the Spark 2.4.7 release binary into Docker container + +```bash +kubectl cp spark-2.4.7-bin-hadoop2.7.tgz dolphinscheduler-worker-0:/opt/soft +kubectl cp -n test spark-2.4.7-bin-hadoop2.7.tgz dolphinscheduler-worker-0:/opt/soft # with test namespace +``` + +Because the volume `sharedStoragePersistence` is mounted on `/opt/soft`, all files in `/opt/soft` will not be lost + +5. Attach the container and ensure that `SPARK_HOME2` exists + +```bash +kubectl exec -it dolphinscheduler-worker-0 bash +kubectl exec -n test -it dolphinscheduler-worker-0 bash # with test namespace +cd /opt/soft +tar zxf spark-2.4.7-bin-hadoop2.7.tgz +rm -f spark-2.4.7-bin-hadoop2.7.tgz +ln -s spark-2.4.7-bin-hadoop2.7 spark2 # or just mv +$SPARK_HOME2/bin/spark-submit --version +``` + +The last command will print Spark version if everything goes well + +6. Verify Spark under a Shell task + +``` +$SPARK_HOME2/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME2/examples/jars/spark-examples_2.11-2.4.7.jar +``` + +Check whether the task log contains the output like `Pi is roughly 3.146015` + +7. Verify Spark under a Spark task + +The file `spark-examples_2.11-2.4.7.jar` needs to be uploaded to the resources first, and then create a Spark task with: + +- Spark Version: `SPARK2` +- Main Class: `org.apache.spark.examples.SparkPi` +- Main Package: `spark-examples_2.11-2.4.7.jar` +- Deploy Mode: `local` + +Similarly, check whether the task log contains the output like `Pi is roughly 3.146015` + +8. Verify Spark on YARN + +Spark on YARN (Deploy Mode is `cluster` or `client`) requires Hadoop support. Similar to Spark support, the operation of supporting Hadoop is almost the same as the previous steps + +Ensure that `$HADOOP_HOME` and `$HADOOP_CONF_DIR` exists + For more information please refer to the [incubator-dolphinscheduler](https://github.com/apache/incubator-dolphinscheduler.git) documentation. + diff --git a/docker/kubernetes/dolphinscheduler/templates/NOTES.txt b/docker/kubernetes/dolphinscheduler/templates/NOTES.txt index 81a30d5677..ac15c07b53 100644 --- a/docker/kubernetes/dolphinscheduler/templates/NOTES.txt +++ b/docker/kubernetes/dolphinscheduler/templates/NOTES.txt @@ -17,16 +17,34 @@ ** Please be patient while the chart DolphinScheduler {{ .Chart.AppVersion }} is being deployed ** -Access DolphinScheduler by: +Access DolphinScheduler UI URL by: {{- if .Values.ingress.enabled }} - DolphinScheduler URL: http://{{ .Values.ingress.host }}/dolphinscheduler + DolphinScheduler UI URL: http{{ if .Values.ingress.tls.enabled }}s{{ end }}://{{ .Values.ingress.host }}/dolphinscheduler -{{- else }} +{{- else if eq .Values.api.service.type "ClusterIP" }} - kubectl port-forward --namespace {{ .Release.Namespace }} svc/{{ template "dolphinscheduler.fullname" . }}-api 12345:12345 + kubectl port-forward -n {{ .Release.Namespace }} svc/{{ template "dolphinscheduler.fullname" . }}-api 12345:12345 - DolphinScheduler URL: http://127.0.0.1:12345/dolphinscheduler + DolphinScheduler UI URL: http://127.0.0.1:12345/dolphinscheduler + +{{- else if eq .Values.api.service.type "NodePort" }} + + NODE_IP=$(kubectl get no -n {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + NODE_PORT=$(kubectl get svc {{ template "dolphinscheduler.fullname" . }}-api -n {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}") + echo http://$NODE_IP:$NODE_PORT/dolphinscheduler + + DolphinScheduler UI URL: http://$NODE_IP:$NODE_PORT/dolphinscheduler + +{{- else if eq .Values.api.service.type "LoadBalancer" }} + + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc {{ template "dolphinscheduler.fullname" . }}-api -n {{ .Release.Namespace }} -w' + + SERVICE_IP=$(kubectl get svc {{ template "dolphinscheduler.fullname" . }}-api -n {{ .Release.Namespace }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:12345/dolphinscheduler + + DolphinScheduler UI URL: http://$SERVICE_IP:12345/dolphinscheduler {{- end }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/_helpers.tpl b/docker/kubernetes/dolphinscheduler/templates/_helpers.tpl index 21ee92e067..3f6a0f53fc 100644 --- a/docker/kubernetes/dolphinscheduler/templates/_helpers.tpl +++ b/docker/kubernetes/dolphinscheduler/templates/_helpers.tpl @@ -33,6 +33,51 @@ Create a default docker image fullname. {{- printf "%s:%s" .Values.image.repository .Values.image.tag -}} {{- end -}} +{{/* +Create a default common labels. +*/}} +{{- define "dolphinscheduler.common.labels" -}} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/version: {{ .Chart.AppVersion }} +{{- end -}} + +{{/* +Create a master labels. +*/}} +{{- define "dolphinscheduler.master.labels" -}} +app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-master +app.kubernetes.io/component: master +{{ include "dolphinscheduler.common.labels" . }} +{{- end -}} + +{{/* +Create a worker labels. +*/}} +{{- define "dolphinscheduler.worker.labels" -}} +app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-worker +app.kubernetes.io/component: worker +{{ include "dolphinscheduler.common.labels" . }} +{{- end -}} + +{{/* +Create an alert labels. +*/}} +{{- define "dolphinscheduler.alert.labels" -}} +app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-alert +app.kubernetes.io/component: alert +{{ include "dolphinscheduler.common.labels" . }} +{{- end -}} + +{{/* +Create an api labels. +*/}} +{{- define "dolphinscheduler.api.labels" -}} +app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api +app.kubernetes.io/component: api +{{ include "dolphinscheduler.common.labels" . }} +{{- end -}} + {{/* Create a default fully qualified postgresql name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). @@ -57,4 +102,135 @@ Create a default fully qualified zookkeeper quorum. {{- define "dolphinscheduler.zookeeper.quorum" -}} {{- $port := default "2181" (.Values.zookeeper.service.port | toString) -}} {{- printf "%s:%s" (include "dolphinscheduler.zookeeper.fullname" .) $port -}} +{{- end -}} + +{{/* +Create a database environment variables. +*/}} +{{- define "dolphinscheduler.database.env_vars" -}} +- name: DATABASE_TYPE + {{- if .Values.postgresql.enabled }} + value: "postgresql" + {{- else }} + value: {{ .Values.externalDatabase.type | quote }} + {{- end }} +- name: DATABASE_DRIVER + {{- if .Values.postgresql.enabled }} + value: "org.postgresql.Driver" + {{- else }} + value: {{ .Values.externalDatabase.driver | quote }} + {{- end }} +- name: DATABASE_HOST + {{- if .Values.postgresql.enabled }} + value: {{ template "dolphinscheduler.postgresql.fullname" . }} + {{- else }} + value: {{ .Values.externalDatabase.host | quote }} + {{- end }} +- name: DATABASE_PORT + {{- if .Values.postgresql.enabled }} + value: "5432" + {{- else }} + value: {{ .Values.externalDatabase.port | quote }} + {{- end }} +- name: DATABASE_USERNAME + {{- if .Values.postgresql.enabled }} + value: {{ .Values.postgresql.postgresqlUsername }} + {{- else }} + value: {{ .Values.externalDatabase.username | quote }} + {{- end }} +- name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + {{- if .Values.postgresql.enabled }} + name: {{ template "dolphinscheduler.postgresql.fullname" . }} + key: postgresql-password + {{- else }} + name: {{ include "dolphinscheduler.fullname" . }}-externaldb + key: database-password + {{- end }} +- name: DATABASE_DATABASE + {{- if .Values.postgresql.enabled }} + value: {{ .Values.postgresql.postgresqlDatabase }} + {{- else }} + value: {{ .Values.externalDatabase.database | quote }} + {{- end }} +- name: DATABASE_PARAMS + {{- if .Values.postgresql.enabled }} + value: "characterEncoding=utf8" + {{- else }} + value: {{ .Values.externalDatabase.params | quote }} + {{- end }} +{{- end -}} + +{{/* +Create a zookeeper environment variables. +*/}} +{{- define "dolphinscheduler.zookeeper.env_vars" -}} +- name: ZOOKEEPER_QUORUM + {{- if .Values.zookeeper.enabled }} + value: {{ template "dolphinscheduler.zookeeper.quorum" . }} + {{- else }} + value: {{ .Values.externalZookeeper.zookeeperQuorum }} + {{- end }} +- name: ZOOKEEPER_ROOT + {{- if .Values.zookeeper.enabled }} + value: {{ .Values.zookeeper.zookeeperRoot }} + {{- else }} + value: {{ .Values.externalZookeeper.zookeeperRoot }} + {{- end }} +{{- end -}} + +{{/* +Create a common fs_s3a environment variables. +*/}} +{{- define "dolphinscheduler.fs_s3a.env_vars" -}} +{{- if eq .Values.common.configmap.RESOURCE_STORAGE_TYPE "S3" -}} +- name: FS_S3A_SECRET_KEY + valueFrom: + secretKeyRef: + key: fs-s3a-secret-key + name: {{ include "dolphinscheduler.fullname" . }}-fs-s3a +{{- end -}} +{{- end -}} + +{{/* +Create a sharedStoragePersistence volume. +*/}} +{{- define "dolphinscheduler.sharedStorage.volume" -}} +{{- if .Values.common.sharedStoragePersistence.enabled -}} +- name: {{ include "dolphinscheduler.fullname" . }}-shared + persistentVolumeClaim: + claimName: {{ include "dolphinscheduler.fullname" . }}-shared +{{- end -}} +{{- end -}} + +{{/* +Create a sharedStoragePersistence volumeMount. +*/}} +{{- define "dolphinscheduler.sharedStorage.volumeMount" -}} +{{- if .Values.common.sharedStoragePersistence.enabled -}} +- mountPath: {{ .Values.common.sharedStoragePersistence.mountPath | quote }} + name: {{ include "dolphinscheduler.fullname" . }}-shared +{{- end -}} +{{- end -}} + +{{/* +Create a fsFileResourcePersistence volume. +*/}} +{{- define "dolphinscheduler.fsFileResource.volume" -}} +{{- if .Values.common.fsFileResourcePersistence.enabled -}} +- name: {{ include "dolphinscheduler.fullname" . }}-fs-file + persistentVolumeClaim: + claimName: {{ include "dolphinscheduler.fullname" . }}-fs-file +{{- end -}} +{{- end -}} + +{{/* +Create a fsFileResourcePersistence volumeMount. +*/}} +{{- define "dolphinscheduler.fsFileResource.volumeMount" -}} +{{- if .Values.common.fsFileResourcePersistence.enabled -}} +- mountPath: {{ .Values.common.configmap.RESOURCE_UPLOAD_PATH | quote }} + name: {{ include "dolphinscheduler.fullname" . }}-fs-file +{{- end -}} {{- end -}} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-alert.yaml b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-alert.yaml index 9b49b7b512..9ef96fbddc 100644 --- a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-alert.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-alert.yaml @@ -21,22 +21,9 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-alert labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-alert - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} data: - DOLPHINSCHEDULER_OPTS: {{ .Values.alert.configmap.DOLPHINSCHEDULER_OPTS | quote }} - XLS_FILE_PATH: {{ .Values.alert.configmap.XLS_FILE_PATH | quote }} - MAIL_SERVER_HOST: {{ .Values.alert.configmap.MAIL_SERVER_HOST | quote }} - MAIL_SERVER_PORT: {{ .Values.alert.configmap.MAIL_SERVER_PORT | quote }} - MAIL_SENDER: {{ .Values.alert.configmap.MAIL_SENDER | quote }} - MAIL_USER: {{ .Values.alert.configmap.MAIL_USER | quote }} - MAIL_PASSWD: {{ .Values.alert.configmap.MAIL_PASSWD | quote }} - MAIL_SMTP_STARTTLS_ENABLE: {{ .Values.alert.configmap.MAIL_SMTP_STARTTLS_ENABLE | quote }} - MAIL_SMTP_SSL_ENABLE: {{ .Values.alert.configmap.MAIL_SMTP_SSL_ENABLE | quote }} - MAIL_SMTP_SSL_TRUST: {{ .Values.alert.configmap.MAIL_SMTP_SSL_TRUST | quote }} - ENTERPRISE_WECHAT_ENABLE: {{ .Values.alert.configmap.ENTERPRISE_WECHAT_ENABLE | quote }} - ENTERPRISE_WECHAT_CORP_ID: {{ .Values.alert.configmap.ENTERPRISE_WECHAT_CORP_ID | quote }} - ENTERPRISE_WECHAT_SECRET: {{ .Values.alert.configmap.ENTERPRISE_WECHAT_SECRET | quote }} - ENTERPRISE_WECHAT_AGENT_ID: {{ .Values.alert.configmap.ENTERPRISE_WECHAT_AGENT_ID | quote }} - ENTERPRISE_WECHAT_USERS: {{ .Values.alert.configmap.ENTERPRISE_WECHAT_USERS | quote }} + {{- range $key, $value := .Values.alert.configmap }} + {{ $key }}: {{ $value | quote }} + {{- end }} {{- end }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-api.yaml b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-api.yaml index 3845f1f67c..70b818fc4c 100644 --- a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-api.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-api.yaml @@ -21,8 +21,9 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-api labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} data: - DOLPHINSCHEDULER_OPTS: {{ .Values.api.configmap.DOLPHINSCHEDULER_OPTS | quote }} + {{- range $key, $value := .Values.api.configmap }} + {{ $key }}: {{ $value | quote }} + {{- end }} {{- end }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-common.yaml b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-common.yaml index 737a31a646..aa8042b0f1 100644 --- a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-common.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-common.yaml @@ -21,17 +21,9 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-common labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-common - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} data: - DATA_BASEDIR_PATH: {{ .Values.common.configmap.DATA_BASEDIR_PATH | quote }} - RESOURCE_STORAGE_TYPE: {{ .Values.common.configmap.RESOURCE_STORAGE_TYPE | quote }} - RESOURCE_UPLOAD_PATH: {{ .Values.common.configmap.RESOURCE_UPLOAD_PATH | quote }} - FS_DEFAULT_FS: {{ .Values.common.configmap.FS_DEFAULT_FS | quote }} - FS_S3A_ENDPOINT: {{ .Values.common.configmap.FS_S3A_ENDPOINT | quote }} - FS_S3A_ACCESS_KEY: {{ .Values.common.configmap.FS_S3A_ACCESS_KEY | quote }} - DOLPHINSCHEDULER_ENV: |- - {{- range .Values.common.configmap.DOLPHINSCHEDULER_ENV }} - {{ . }} + {{- range $key, $value := (omit .Values.common.configmap "FS_S3A_SECRET_KEY") }} + {{ $key }}: {{ $value | quote }} {{- end }} {{- end }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-master.yaml b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-master.yaml index 7c1be7717e..34823f66a7 100644 --- a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-master.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-master.yaml @@ -21,16 +21,9 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-master labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-master - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} data: - DOLPHINSCHEDULER_OPTS: {{ .Values.master.configmap.DOLPHINSCHEDULER_OPTS | quote }} - MASTER_EXEC_THREADS: {{ .Values.master.configmap.MASTER_EXEC_THREADS | quote }} - MASTER_EXEC_TASK_NUM: {{ .Values.master.configmap.MASTER_EXEC_TASK_NUM | quote }} - MASTER_HEARTBEAT_INTERVAL: {{ .Values.master.configmap.MASTER_HEARTBEAT_INTERVAL | quote }} - MASTER_TASK_COMMIT_RETRYTIMES: {{ .Values.master.configmap.MASTER_TASK_COMMIT_RETRYTIMES | quote }} - MASTER_TASK_COMMIT_INTERVAL: {{ .Values.master.configmap.MASTER_TASK_COMMIT_INTERVAL | quote }} - MASTER_MAX_CPULOAD_AVG: {{ .Values.master.configmap.MASTER_MAX_CPULOAD_AVG | quote }} - MASTER_RESERVED_MEMORY: {{ .Values.master.configmap.MASTER_RESERVED_MEMORY | quote }} - MASTER_LISTEN_PORT: {{ .Values.master.configmap.MASTER_LISTEN_PORT | quote }} + {{- range $key, $value := .Values.master.configmap }} + {{ $key }}: {{ $value | quote }} + {{- end }} {{- end }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-worker.yaml b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-worker.yaml index 262d97ef8a..68a292d61f 100644 --- a/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-worker.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/configmap-dolphinscheduler-worker.yaml @@ -21,15 +21,9 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-worker labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-worker - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} data: - DOLPHINSCHEDULER_OPTS: {{ .Values.worker.configmap.DOLPHINSCHEDULER_OPTS | quote }} - LOGGER_SERVER_OPTS: {{ .Values.worker.configmap.LOGGER_SERVER_OPTS | quote }} - WORKER_EXEC_THREADS: {{ .Values.worker.configmap.WORKER_EXEC_THREADS | quote }} - WORKER_HEARTBEAT_INTERVAL: {{ .Values.worker.configmap.WORKER_HEARTBEAT_INTERVAL | quote }} - WORKER_MAX_CPULOAD_AVG: {{ .Values.worker.configmap.WORKER_MAX_CPULOAD_AVG | quote }} - WORKER_RESERVED_MEMORY: {{ .Values.worker.configmap.WORKER_RESERVED_MEMORY | quote }} - WORKER_LISTEN_PORT: {{ .Values.worker.configmap.WORKER_LISTEN_PORT | quote }} - WORKER_GROUPS: {{ .Values.worker.configmap.WORKER_GROUPS | quote }} + {{- range $key, $value := .Values.worker.configmap }} + {{ $key }}: {{ $value | quote }} + {{- end }} {{- end }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-alert.yaml b/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-alert.yaml index 5ab1fc62b3..9c7577bae9 100644 --- a/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-alert.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-alert.yaml @@ -19,18 +19,12 @@ kind: Deployment metadata: name: {{ include "dolphinscheduler.fullname" . }}-alert labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-alert - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: alert + {{- include "dolphinscheduler.alert.labels" . | nindent 4 }} spec: replicas: {{ .Values.alert.replicas }} selector: matchLabels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-alert - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: alert + {{- include "dolphinscheduler.alert.labels" . | nindent 6 }} strategy: type: {{ .Values.alert.strategy.type | quote }} rollingUpdate: @@ -39,23 +33,23 @@ spec: template: metadata: labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-alert - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: alert + {{- include "dolphinscheduler.alert.labels" . | nindent 8 }} {{- if .Values.alert.annotations }} annotations: {{- toYaml .Values.alert.annotations | nindent 8 }} {{- end }} spec: {{- if .Values.alert.affinity }} - affinity: {{- toYaml .Values.alert.affinity | nindent 8 }} + affinity: + {{- toYaml .Values.alert.affinity | nindent 8 }} {{- end }} {{- if .Values.alert.nodeSelector }} - nodeSelector: {{- toYaml .Values.alert.nodeSelector | nindent 8 }} + nodeSelector: + {{- toYaml .Values.alert.nodeSelector | nindent 8 }} {{- end }} {{- if .Values.alert.tolerations }} - tolerations: {{- toYaml . | nindent 8 }} + tolerations: + {{- toYaml .Values.alert.tolerations | nindent 8 }} {{- end }} {{- if .Values.image.pullSecret }} imagePullSecrets: @@ -70,149 +64,20 @@ spec: env: - name: TZ value: {{ .Values.timezone }} - - name: DOLPHINSCHEDULER_OPTS - valueFrom: - configMapKeyRef: - key: DOLPHINSCHEDULER_OPTS - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: XLS_FILE_PATH - valueFrom: - configMapKeyRef: - key: XLS_FILE_PATH - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SERVER_HOST - valueFrom: - configMapKeyRef: - key: MAIL_SERVER_HOST - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SERVER_PORT - valueFrom: - configMapKeyRef: - key: MAIL_SERVER_PORT - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SENDER - valueFrom: - configMapKeyRef: - key: MAIL_SENDER - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_USER - valueFrom: - configMapKeyRef: - key: MAIL_USER - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_PASSWD - valueFrom: - configMapKeyRef: - key: MAIL_PASSWD - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SMTP_STARTTLS_ENABLE - valueFrom: - configMapKeyRef: - key: MAIL_SMTP_STARTTLS_ENABLE - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SMTP_SSL_ENABLE - valueFrom: - configMapKeyRef: - key: MAIL_SMTP_SSL_ENABLE - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SMTP_SSL_TRUST - valueFrom: - configMapKeyRef: - key: MAIL_SMTP_SSL_TRUST - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: ENTERPRISE_WECHAT_ENABLE - valueFrom: - configMapKeyRef: - key: ENTERPRISE_WECHAT_ENABLE - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: ENTERPRISE_WECHAT_CORP_ID - valueFrom: - configMapKeyRef: - key: ENTERPRISE_WECHAT_CORP_ID - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: ENTERPRISE_WECHAT_SECRET - valueFrom: - configMapKeyRef: - key: ENTERPRISE_WECHAT_SECRET - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: ENTERPRISE_WECHAT_AGENT_ID - valueFrom: - configMapKeyRef: - key: ENTERPRISE_WECHAT_AGENT_ID - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: ENTERPRISE_WECHAT_USERS - valueFrom: - configMapKeyRef: - key: ENTERPRISE_WECHAT_USERS - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: DATABASE_TYPE - {{- if .Values.postgresql.enabled }} - value: "postgresql" - {{- else }} - value: {{ .Values.externalDatabase.type | quote }} - {{- end }} - - name: DATABASE_DRIVER - {{- if .Values.postgresql.enabled }} - value: "org.postgresql.Driver" - {{- else }} - value: {{ .Values.externalDatabase.driver | quote }} - {{- end }} - - name: DATABASE_HOST - {{- if .Values.postgresql.enabled }} - value: {{ template "dolphinscheduler.postgresql.fullname" . }} - {{- else }} - value: {{ .Values.externalDatabase.host | quote }} - {{- end }} - - name: DATABASE_PORT - {{- if .Values.postgresql.enabled }} - value: "5432" - {{- else }} - value: {{ .Values.externalDatabase.port | quote }} - {{- end }} - - name: DATABASE_USERNAME - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlUsername }} - {{- else }} - value: {{ .Values.externalDatabase.username | quote }} - {{- end }} - - name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - {{- if .Values.postgresql.enabled }} - name: {{ template "dolphinscheduler.postgresql.fullname" . }} - key: postgresql-password - {{- else }} - name: {{ include "dolphinscheduler.fullname" . }}-externaldb - key: database-password - {{- end }} - - name: DATABASE_DATABASE - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlDatabase }} - {{- else }} - value: {{ .Values.externalDatabase.database | quote }} - {{- end }} - - name: DATABASE_PARAMS - {{- if .Values.postgresql.enabled }} - value: "characterEncoding=utf8" - {{- else }} - value: {{ .Values.externalDatabase.params | quote }} - {{- end }} + {{- include "dolphinscheduler.database.env_vars" . | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-common + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-alert {{- if .Values.alert.resources }} resources: - limits: - memory: {{ .Values.alert.resources.limits.memory }} - cpu: {{ .Values.alert.resources.limits.cpu }} - requests: - memory: {{ .Values.alert.resources.requests.memory }} - cpu: {{ .Values.alert.resources.requests.cpu }} + {{- toYaml .Values.alert.resources | nindent 12 }} {{- end }} {{- if .Values.alert.livenessProbe.enabled }} livenessProbe: exec: - command: - - sh - - /root/checkpoint.sh - - AlertServer + command: ["bash", "/root/checkpoint.sh", "AlertServer"] initialDelaySeconds: {{ .Values.alert.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.alert.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.alert.livenessProbe.timeoutSeconds }} @@ -222,10 +87,7 @@ spec: {{- if .Values.alert.readinessProbe.enabled }} readinessProbe: exec: - command: - - sh - - /root/checkpoint.sh - - AlertServer + command: ["bash", "/root/checkpoint.sh", "AlertServer"] initialDelaySeconds: {{ .Values.alert.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.alert.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.alert.readinessProbe.timeoutSeconds }} diff --git a/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-api.yaml b/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-api.yaml index b9fb5e71c2..65b5dea810 100644 --- a/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-api.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/deployment-dolphinscheduler-api.yaml @@ -19,18 +19,12 @@ kind: Deployment metadata: name: {{ include "dolphinscheduler.fullname" . }}-api labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: api + {{- include "dolphinscheduler.api.labels" . | nindent 4 }} spec: replicas: {{ .Values.api.replicas }} selector: matchLabels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: api + {{- include "dolphinscheduler.api.labels" . | nindent 6 }} strategy: type: {{ .Values.api.strategy.type | quote }} rollingUpdate: @@ -39,23 +33,23 @@ spec: template: metadata: labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: api - {{- if .Values.alert.annotations }} + {{- include "dolphinscheduler.api.labels" . | nindent 8 }} + {{- if .Values.api.annotations }} annotations: - {{- toYaml .Values.alert.annotations | nindent 8 }} + {{- toYaml .Values.api.annotations | nindent 8 }} {{- end }} spec: {{- if .Values.api.affinity }} - affinity: {{- toYaml .Values.api.affinity | nindent 8 }} + affinity: + {{- toYaml .Values.api.affinity | nindent 8 }} {{- end }} {{- if .Values.api.nodeSelector }} - nodeSelector: {{- toYaml .Values.api.nodeSelector | nindent 8 }} + nodeSelector: + {{- toYaml .Values.api.nodeSelector | nindent 8 }} {{- end }} {{- if .Values.api.tolerations }} - tolerations: {{- toYaml . | nindent 8 }} + tolerations: + {{- toYaml .Values.api.tolerations | nindent 8 }} {{- end }} {{- if .Values.image.pullSecret }} imagePullSecrets: @@ -73,125 +67,22 @@ spec: env: - name: TZ value: {{ .Values.timezone }} - - name: DOLPHINSCHEDULER_OPTS - valueFrom: - configMapKeyRef: - key: DOLPHINSCHEDULER_OPTS - name: {{ include "dolphinscheduler.fullname" . }}-api - - name: DATABASE_TYPE - {{- if .Values.postgresql.enabled }} - value: "postgresql" - {{- else }} - value: {{ .Values.externalDatabase.type | quote }} - {{- end }} - - name: DATABASE_DRIVER - {{- if .Values.postgresql.enabled }} - value: "org.postgresql.Driver" - {{- else }} - value: {{ .Values.externalDatabase.driver | quote }} - {{- end }} - - name: DATABASE_HOST - {{- if .Values.postgresql.enabled }} - value: {{ template "dolphinscheduler.postgresql.fullname" . }} - {{- else }} - value: {{ .Values.externalDatabase.host | quote }} - {{- end }} - - name: DATABASE_PORT - {{- if .Values.postgresql.enabled }} - value: "5432" - {{- else }} - value: {{ .Values.externalDatabase.port | quote }} - {{- end }} - - name: DATABASE_USERNAME - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlUsername }} - {{- else }} - value: {{ .Values.externalDatabase.username | quote }} - {{- end }} - - name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - {{- if .Values.postgresql.enabled }} - name: {{ template "dolphinscheduler.postgresql.fullname" . }} - key: postgresql-password - {{- else }} - name: {{ include "dolphinscheduler.fullname" . }}-externaldb - key: database-password - {{- end }} - - name: DATABASE_DATABASE - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlDatabase }} - {{- else }} - value: {{ .Values.externalDatabase.database | quote }} - {{- end }} - - name: DATABASE_PARAMS - {{- if .Values.postgresql.enabled }} - value: "characterEncoding=utf8" - {{- else }} - value: {{ .Values.externalDatabase.params | quote }} - {{- end }} - - name: ZOOKEEPER_QUORUM - {{- if .Values.zookeeper.enabled }} - value: {{ template "dolphinscheduler.zookeeper.quorum" . }} - {{- else }} - value: {{ .Values.externalZookeeper.zookeeperQuorum }} - {{- end }} - - name: ZOOKEEPER_ROOT - {{- if .Values.zookeeper.enabled }} - value: {{ .Values.zookeeper.zookeeperRoot }} - {{- else }} - value: {{ .Values.externalZookeeper.zookeeperRoot }} - {{- end }} - - name: DATA_BASEDIR_PATH - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-common - key: DATA_BASEDIR_PATH - - name: RESOURCE_STORAGE_TYPE - valueFrom: - configMapKeyRef: - key: RESOURCE_STORAGE_TYPE - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: RESOURCE_UPLOAD_PATH - valueFrom: - configMapKeyRef: - key: RESOURCE_UPLOAD_PATH - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: FS_DEFAULT_FS - valueFrom: - configMapKeyRef: - key: FS_DEFAULT_FS - name: {{ include "dolphinscheduler.fullname" . }}-common - {{- if eq .Values.common.configmap.RESOURCE_STORAGE_TYPE "S3" }} - - name: FS_S3A_ENDPOINT - valueFrom: - configMapKeyRef: - key: FS_S3A_ENDPOINT - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: FS_S3A_ACCESS_KEY - valueFrom: - configMapKeyRef: - key: FS_S3A_ACCESS_KEY - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: FS_S3A_SECRET_KEY - valueFrom: - secretKeyRef: - key: fs-s3a-secret-key - name: {{ include "dolphinscheduler.fullname" . }}-fs-s3a - {{- end }} + {{- include "dolphinscheduler.database.env_vars" . | nindent 12 }} + {{- include "dolphinscheduler.zookeeper.env_vars" . | nindent 12 }} + {{- include "dolphinscheduler.fs_s3a.env_vars" . | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-common + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-api {{- if .Values.api.resources }} resources: - limits: - memory: {{ .Values.api.resources.limits.memory | quote }} - cpu: {{ .Values.api.resources.limits.cpu | quote }} - requests: - memory: {{ .Values.api.resources.requests.memory | quote }} - cpu: {{ .Values.api.resources.requests.cpu | quote }} + {{- toYaml .Values.api.resources | nindent 12 }} {{- end }} {{- if .Values.api.livenessProbe.enabled }} livenessProbe: - tcpSocket: - port: 12345 + exec: + command: ["bash", "/root/checkpoint.sh", "ApiApplicationServer"] initialDelaySeconds: {{ .Values.api.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.api.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.api.livenessProbe.timeoutSeconds }} @@ -200,8 +91,8 @@ spec: {{- end }} {{- if .Values.api.readinessProbe.enabled }} readinessProbe: - tcpSocket: - port: 12345 + exec: + command: ["bash", "/root/checkpoint.sh", "ApiApplicationServer"] initialDelaySeconds: {{ .Values.api.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.api.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.api.readinessProbe.timeoutSeconds }} @@ -211,10 +102,8 @@ spec: volumeMounts: - mountPath: "/opt/dolphinscheduler/logs" name: {{ include "dolphinscheduler.fullname" . }}-api - {{- if .Values.common.fsFileResourcePersistence.enabled }} - - mountPath: {{ .Values.common.configmap.RESOURCE_UPLOAD_PATH | quote }} - name: {{ include "dolphinscheduler.fullname" . }}-fs-file - {{- end }} + {{- include "dolphinscheduler.sharedStorage.volumeMount" . | nindent 12 }} + {{- include "dolphinscheduler.fsFileResource.volumeMount" . | nindent 12 }} volumes: - name: {{ include "dolphinscheduler.fullname" . }}-api {{- if .Values.api.persistentVolumeClaim.enabled }} @@ -223,8 +112,5 @@ spec: {{- else }} emptyDir: {} {{- end }} - {{- if .Values.common.fsFileResourcePersistence.enabled }} - - name: {{ include "dolphinscheduler.fullname" . }}-fs-file - persistentVolumeClaim: - claimName: {{ include "dolphinscheduler.fullname" . }}-fs-file - {{- end }} \ No newline at end of file + {{- include "dolphinscheduler.sharedStorage.volume" . | nindent 8 }} + {{- include "dolphinscheduler.fsFileResource.volume" . | nindent 8 }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/ingress.yaml b/docker/kubernetes/dolphinscheduler/templates/ingress.yaml index 4f76f2b304..7a8d6ac8be 100644 --- a/docker/kubernetes/dolphinscheduler/templates/ingress.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/ingress.yaml @@ -27,8 +27,7 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }} labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} spec: rules: - host: {{ .Values.ingress.host }} diff --git a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-alert.yaml b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-alert.yaml index fd5e88afb9..6c803ad8b6 100644 --- a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-alert.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-alert.yaml @@ -21,8 +21,7 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-alert labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-alert - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} spec: accessModes: {{- range .Values.alert.persistentVolumeClaim.accessModes }} diff --git a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-api.yaml b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-api.yaml index 48292966ad..032c72fe7c 100644 --- a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-api.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-api.yaml @@ -21,8 +21,7 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-api labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} spec: accessModes: {{- range .Values.api.persistentVolumeClaim.accessModes }} diff --git a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-fs-file.yaml b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-fs-file.yaml index 1e277eb060..8c99f42b82 100644 --- a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-fs-file.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-fs-file.yaml @@ -21,8 +21,7 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-fs-file labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-fs-file - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} annotations: "helm.sh/resource-policy": keep spec: diff --git a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-shared.yaml b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-shared.yaml index fc65efc93d..4bccdb119e 100644 --- a/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-shared.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/pvc-dolphinscheduler-shared.yaml @@ -21,8 +21,7 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-shared labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-shared - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} annotations: "helm.sh/resource-policy": keep spec: diff --git a/docker/kubernetes/dolphinscheduler/templates/secret-external-database.yaml b/docker/kubernetes/dolphinscheduler/templates/secret-external-database.yaml index c15ef14ffc..4d299efbc6 100644 --- a/docker/kubernetes/dolphinscheduler/templates/secret-external-database.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/secret-external-database.yaml @@ -21,8 +21,7 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-externaldb labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-externaldb - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} type: Opaque data: database-password: {{ .Values.externalDatabase.password | b64enc | quote }} diff --git a/docker/kubernetes/dolphinscheduler/templates/secret-external-fs-s3a.yaml b/docker/kubernetes/dolphinscheduler/templates/secret-external-fs-s3a.yaml index 722d31fd2d..d84dee970f 100644 --- a/docker/kubernetes/dolphinscheduler/templates/secret-external-fs-s3a.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/secret-external-fs-s3a.yaml @@ -21,8 +21,7 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-fs-s3a labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-fs-s3a - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} type: Opaque data: fs-s3a-secret-key: {{ .Values.common.configmap.FS_S3A_SECRET_KEY | b64enc | quote }} diff --git a/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-master.yaml b/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-master.yaml index 92515ca86e..e64f0ac5da 100644 --- a/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-master.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-master.yaml @@ -19,40 +19,34 @@ kind: StatefulSet metadata: name: {{ include "dolphinscheduler.fullname" . }}-master labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-master - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: master + {{- include "dolphinscheduler.master.labels" . | nindent 4 }} spec: podManagementPolicy: {{ .Values.master.podManagementPolicy }} replicas: {{ .Values.master.replicas }} selector: matchLabels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-master - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: master + {{- include "dolphinscheduler.master.labels" . | nindent 6 }} serviceName: {{ template "dolphinscheduler.fullname" . }}-master-headless template: metadata: labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-master - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: master - {{- if .Values.alert.annotations }} + {{- include "dolphinscheduler.master.labels" . | nindent 8 }} + {{- if .Values.master.annotations }} annotations: - {{- toYaml .Values.alert.annotations | nindent 8 }} + {{- toYaml .Values.master.annotations | nindent 8 }} {{- end }} spec: {{- if .Values.master.affinity }} - affinity: {{- toYaml .Values.master.affinity | nindent 8 }} + affinity: + {{- toYaml .Values.master.affinity | nindent 8 }} {{- end }} {{- if .Values.master.nodeSelector }} - nodeSelector: {{- toYaml .Values.master.nodeSelector | nindent 8 }} + nodeSelector: + {{- toYaml .Values.master.nodeSelector | nindent 8 }} {{- end }} {{- if .Values.master.tolerations }} - tolerations: {{- toYaml . | nindent 8 }} + tolerations: + {{- toYaml .Values.master.tolerations | nindent 8 }} {{- end }} {{- if .Values.image.pullSecret }} imagePullSecrets: @@ -65,136 +59,27 @@ spec: args: - "master-server" ports: - - containerPort: {{ .Values.master.configmap.MASTER_LISTEN_PORT }} + - containerPort: 5678 name: "master-port" env: - name: TZ value: {{ .Values.timezone }} - - name: DOLPHINSCHEDULER_OPTS - valueFrom: - configMapKeyRef: - key: DOLPHINSCHEDULER_OPTS - name: {{ include "dolphinscheduler.fullname" . }}-master - - name: MASTER_EXEC_THREADS - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_EXEC_THREADS - - name: MASTER_EXEC_TASK_NUM - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_EXEC_TASK_NUM - - name: MASTER_HEARTBEAT_INTERVAL - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_HEARTBEAT_INTERVAL - - name: MASTER_TASK_COMMIT_RETRYTIMES - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_TASK_COMMIT_RETRYTIMES - - name: MASTER_TASK_COMMIT_INTERVAL - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_TASK_COMMIT_INTERVAL - - name: MASTER_MAX_CPULOAD_AVG - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_MAX_CPULOAD_AVG - - name: MASTER_RESERVED_MEMORY - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_RESERVED_MEMORY - - name: MASTER_LISTEN_PORT - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-master - key: MASTER_LISTEN_PORT - - name: DATABASE_TYPE - {{- if .Values.postgresql.enabled }} - value: "postgresql" - {{- else }} - value: {{ .Values.externalDatabase.type | quote }} - {{- end }} - - name: DATABASE_DRIVER - {{- if .Values.postgresql.enabled }} - value: "org.postgresql.Driver" - {{- else }} - value: {{ .Values.externalDatabase.driver | quote }} - {{- end }} - - name: DATABASE_HOST - {{- if .Values.postgresql.enabled }} - value: {{ template "dolphinscheduler.postgresql.fullname" . }} - {{- else }} - value: {{ .Values.externalDatabase.host | quote }} - {{- end }} - - name: DATABASE_PORT - {{- if .Values.postgresql.enabled }} - value: "5432" - {{- else }} - value: {{ .Values.externalDatabase.port | quote }} - {{- end }} - - name: DATABASE_USERNAME - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlUsername }} - {{- else }} - value: {{ .Values.externalDatabase.username | quote }} - {{- end }} - - name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - {{- if .Values.postgresql.enabled }} - name: {{ template "dolphinscheduler.postgresql.fullname" . }} - key: postgresql-password - {{- else }} - name: {{ include "dolphinscheduler.fullname" . }}-externaldb - key: database-password - {{- end }} - - name: DATABASE_DATABASE - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlDatabase }} - {{- else }} - value: {{ .Values.externalDatabase.database | quote }} - {{- end }} - - name: DATABASE_PARAMS - {{- if .Values.postgresql.enabled }} - value: "characterEncoding=utf8" - {{- else }} - value: {{ .Values.externalDatabase.params | quote }} - {{- end }} - - name: ZOOKEEPER_QUORUM - {{- if .Values.zookeeper.enabled }} - value: {{ template "dolphinscheduler.zookeeper.quorum" . }} - {{- else }} - value: {{ .Values.externalZookeeper.zookeeperQuorum }} - {{- end }} - - name: ZOOKEEPER_ROOT - {{- if .Values.zookeeper.enabled }} - value: {{ .Values.zookeeper.zookeeperRoot }} - {{- else }} - value: {{ .Values.externalZookeeper.zookeeperRoot }} - {{- end }} + {{- include "dolphinscheduler.database.env_vars" . | nindent 12 }} + {{- include "dolphinscheduler.zookeeper.env_vars" . | nindent 12 }} + {{- include "dolphinscheduler.fs_s3a.env_vars" . | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-common + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-master {{- if .Values.master.resources }} resources: - limits: - memory: {{ .Values.master.resources.limits.memory | quote }} - cpu: {{ .Values.master.resources.limits.cpu | quote }} - requests: - memory: {{ .Values.master.resources.requests.memory | quote }} - cpu: {{ .Values.master.resources.requests.cpu | quote }} + {{- toYaml .Values.master.resources | nindent 12 }} {{- end }} {{- if .Values.master.livenessProbe.enabled }} livenessProbe: exec: - command: - - sh - - /root/checkpoint.sh - - MasterServer + command: ["bash", "/root/checkpoint.sh", "MasterServer"] initialDelaySeconds: {{ .Values.master.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.master.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.master.livenessProbe.timeoutSeconds }} @@ -204,10 +89,7 @@ spec: {{- if .Values.master.readinessProbe.enabled }} readinessProbe: exec: - command: - - sh - - /root/checkpoint.sh - - MasterServer + command: ["bash", "/root/checkpoint.sh", "MasterServer"] initialDelaySeconds: {{ .Values.master.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.master.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.master.readinessProbe.timeoutSeconds }} @@ -217,13 +99,7 @@ spec: volumeMounts: - mountPath: "/opt/dolphinscheduler/logs" name: {{ include "dolphinscheduler.fullname" . }}-master - - mountPath: "/opt/dolphinscheduler/conf/env/dolphinscheduler_env.sh" - subPath: "dolphinscheduler_env.sh" - name: {{ include "dolphinscheduler.fullname" . }}-common-env - {{- if .Values.common.sharedStoragePersistence.enabled }} - - mountPath: {{ .Values.common.sharedStoragePersistence.mountPath | quote }} - name: {{ include "dolphinscheduler.fullname" . }}-shared - {{- end }} + {{- include "dolphinscheduler.sharedStorage.volumeMount" . | nindent 12 }} volumes: - name: {{ include "dolphinscheduler.fullname" . }}-master {{- if .Values.master.persistentVolumeClaim.enabled }} @@ -232,18 +108,7 @@ spec: {{- else }} emptyDir: {} {{- end }} - - name: {{ include "dolphinscheduler.fullname" . }}-common-env - configMap: - defaultMode: 0777 - name: {{ include "dolphinscheduler.fullname" . }}-common - items: - - key: DOLPHINSCHEDULER_ENV - path: dolphinscheduler_env.sh - {{- if .Values.common.sharedStoragePersistence.enabled }} - - name: {{ include "dolphinscheduler.fullname" . }}-shared - persistentVolumeClaim: - claimName: {{ include "dolphinscheduler.fullname" . }}-shared - {{- end }} + {{- include "dolphinscheduler.sharedStorage.volume" . | nindent 8 }} {{- if .Values.master.persistentVolumeClaim.enabled }} volumeClaimTemplates: - metadata: @@ -255,7 +120,7 @@ spec: spec: accessModes: {{- range .Values.master.persistentVolumeClaim.accessModes }} - - {{ . | quote }} + - {{ . | quote }} {{- end }} storageClassName: {{ .Values.master.persistentVolumeClaim.storageClassName | quote }} resources: diff --git a/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-worker.yaml b/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-worker.yaml index 9a452be943..76cc18f5cc 100644 --- a/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-worker.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/statefulset-dolphinscheduler-worker.yaml @@ -19,40 +19,34 @@ kind: StatefulSet metadata: name: {{ include "dolphinscheduler.fullname" . }}-worker labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-worker - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: worker + {{- include "dolphinscheduler.worker.labels" . | nindent 4 }} spec: podManagementPolicy: {{ .Values.worker.podManagementPolicy }} replicas: {{ .Values.worker.replicas }} selector: matchLabels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-worker - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: worker + {{- include "dolphinscheduler.worker.labels" . | nindent 6 }} serviceName: {{ template "dolphinscheduler.fullname" . }}-worker-headless template: metadata: labels: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-worker - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: worker - {{- if .Values.alert.annotations }} + {{- include "dolphinscheduler.worker.labels" . | nindent 8 }} + {{- if .Values.worker.annotations }} annotations: - {{- toYaml .Values.alert.annotations | nindent 8 }} + {{- toYaml .Values.worker.annotations | nindent 8 }} {{- end }} spec: {{- if .Values.worker.affinity }} - affinity: {{- toYaml .Values.worker.affinity | nindent 8 }} + affinity: + {{- toYaml .Values.worker.affinity | nindent 8 }} {{- end }} {{- if .Values.worker.nodeSelector }} - nodeSelector: {{- toYaml .Values.worker.nodeSelector | nindent 8 }} + nodeSelector: + {{- toYaml .Values.worker.nodeSelector | nindent 8 }} {{- end }} {{- if .Values.worker.tolerations }} - tolerations: {{- toYaml . | nindent 8 }} + tolerations: + {{- toYaml .Values.worker.tolerations | nindent 8 }} {{- end }} {{- if .Values.image.pullSecret }} imagePullSecrets: @@ -65,215 +59,31 @@ spec: args: - "worker-server" ports: - - containerPort: {{ .Values.worker.configmap.WORKER_LISTEN_PORT }} + - containerPort: 1234 name: "worker-port" - containerPort: 50051 name: "logger-port" env: - name: TZ value: {{ .Values.timezone }} - - name: DOLPHINSCHEDULER_OPTS - valueFrom: - configMapKeyRef: - key: DOLPHINSCHEDULER_OPTS - name: {{ include "dolphinscheduler.fullname" . }}-worker - - name: LOGGER_SERVER_OPTS - valueFrom: - configMapKeyRef: - key: LOGGER_SERVER_OPTS - name: {{ include "dolphinscheduler.fullname" . }}-worker - - name: WORKER_EXEC_THREADS - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-worker - key: WORKER_EXEC_THREADS - - name: WORKER_HEARTBEAT_INTERVAL - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-worker - key: WORKER_HEARTBEAT_INTERVAL - - name: WORKER_MAX_CPULOAD_AVG - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-worker - key: WORKER_MAX_CPULOAD_AVG - - name: WORKER_RESERVED_MEMORY - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-worker - key: WORKER_RESERVED_MEMORY - - name: WORKER_LISTEN_PORT - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-worker - key: WORKER_LISTEN_PORT - - name: WORKER_GROUPS - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-worker - key: WORKER_GROUPS - - name: XLS_FILE_PATH - valueFrom: - configMapKeyRef: - key: XLS_FILE_PATH - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SERVER_HOST - valueFrom: - configMapKeyRef: - key: MAIL_SERVER_HOST - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SERVER_PORT - valueFrom: - configMapKeyRef: - key: MAIL_SERVER_PORT - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SENDER - valueFrom: - configMapKeyRef: - key: MAIL_SENDER - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_USER - valueFrom: - configMapKeyRef: - key: MAIL_USER - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_PASSWD - valueFrom: - configMapKeyRef: - key: MAIL_PASSWD - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SMTP_STARTTLS_ENABLE - valueFrom: - configMapKeyRef: - key: MAIL_SMTP_STARTTLS_ENABLE - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SMTP_SSL_ENABLE - valueFrom: - configMapKeyRef: - key: MAIL_SMTP_SSL_ENABLE - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: MAIL_SMTP_SSL_TRUST - valueFrom: - configMapKeyRef: - key: MAIL_SMTP_SSL_TRUST - name: {{ include "dolphinscheduler.fullname" . }}-alert - - name: DATABASE_TYPE - {{- if .Values.postgresql.enabled }} - value: "postgresql" - {{- else }} - value: {{ .Values.externalDatabase.type | quote }} - {{- end }} - - name: DATABASE_DRIVER - {{- if .Values.postgresql.enabled }} - value: "org.postgresql.Driver" - {{- else }} - value: {{ .Values.externalDatabase.driver | quote }} - {{- end }} - - name: DATABASE_HOST - {{- if .Values.postgresql.enabled }} - value: {{ template "dolphinscheduler.postgresql.fullname" . }} - {{- else }} - value: {{ .Values.externalDatabase.host | quote }} - {{- end }} - - name: DATABASE_PORT - {{- if .Values.postgresql.enabled }} - value: "5432" - {{- else }} - value: {{ .Values.externalDatabase.port | quote }} - {{- end }} - - name: DATABASE_USERNAME - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlUsername }} - {{- else }} - value: {{ .Values.externalDatabase.username | quote }} - {{- end }} - - name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - {{- if .Values.postgresql.enabled }} - name: {{ template "dolphinscheduler.postgresql.fullname" . }} - key: postgresql-password - {{- else }} - name: {{ include "dolphinscheduler.fullname" . }}-externaldb - key: database-password - {{- end }} - - name: DATABASE_DATABASE - {{- if .Values.postgresql.enabled }} - value: {{ .Values.postgresql.postgresqlDatabase }} - {{- else }} - value: {{ .Values.externalDatabase.database | quote }} - {{- end }} - - name: DATABASE_PARAMS - {{- if .Values.postgresql.enabled }} - value: "characterEncoding=utf8" - {{- else }} - value: {{ .Values.externalDatabase.params | quote }} - {{- end }} - - name: ZOOKEEPER_QUORUM - {{- if .Values.zookeeper.enabled }} - value: {{ template "dolphinscheduler.zookeeper.quorum" . }} - {{- else }} - value: {{ .Values.externalZookeeper.zookeeperQuorum }} - {{- end }} - - name: ZOOKEEPER_ROOT - {{- if .Values.zookeeper.enabled }} - value: {{ .Values.zookeeper.zookeeperRoot }} - {{- else }} - value: {{ .Values.externalZookeeper.zookeeperRoot }} - {{- end }} - - name: DATA_BASEDIR_PATH - valueFrom: - configMapKeyRef: - name: {{ include "dolphinscheduler.fullname" . }}-common - key: DATA_BASEDIR_PATH - - name: RESOURCE_STORAGE_TYPE - valueFrom: - configMapKeyRef: - key: RESOURCE_STORAGE_TYPE - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: RESOURCE_UPLOAD_PATH - valueFrom: - configMapKeyRef: - key: RESOURCE_UPLOAD_PATH - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: FS_DEFAULT_FS - valueFrom: - configMapKeyRef: - key: FS_DEFAULT_FS - name: {{ include "dolphinscheduler.fullname" . }}-common - {{- if eq .Values.common.configmap.RESOURCE_STORAGE_TYPE "S3" }} - - name: FS_S3A_ENDPOINT - valueFrom: - configMapKeyRef: - key: FS_S3A_ENDPOINT - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: FS_S3A_ACCESS_KEY - valueFrom: - configMapKeyRef: - key: FS_S3A_ACCESS_KEY - name: {{ include "dolphinscheduler.fullname" . }}-common - - name: FS_S3A_SECRET_KEY - valueFrom: - secretKeyRef: - key: fs-s3a-secret-key - name: {{ include "dolphinscheduler.fullname" . }}-fs-s3a - {{- end }} + {{- include "dolphinscheduler.database.env_vars" . | nindent 12 }} + {{- include "dolphinscheduler.zookeeper.env_vars" . | nindent 12 }} + {{- include "dolphinscheduler.fs_s3a.env_vars" . | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-common + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-worker + - configMapRef: + name: {{ include "dolphinscheduler.fullname" . }}-alert {{- if .Values.worker.resources }} resources: - limits: - memory: {{ .Values.worker.resources.limits.memory | quote }} - cpu: {{ .Values.worker.resources.limits.cpu | quote }} - requests: - memory: {{ .Values.worker.resources.requests.memory | quote }} - cpu: {{ .Values.worker.resources.requests.cpu | quote }} + {{- toYaml .Values.worker.resources | nindent 12 }} {{- end }} {{- if .Values.worker.livenessProbe.enabled }} livenessProbe: exec: - command: - - sh - - /root/checkpoint.sh - - WorkerServer + command: ["bash", "/root/checkpoint.sh", "WorkerServer"] initialDelaySeconds: {{ .Values.worker.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.worker.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.worker.livenessProbe.timeoutSeconds }} @@ -283,10 +93,7 @@ spec: {{- if .Values.worker.readinessProbe.enabled }} readinessProbe: exec: - command: - - sh - - /root/checkpoint.sh - - WorkerServer + command: ["bash", "/root/checkpoint.sh", "WorkerServer"] initialDelaySeconds: {{ .Values.worker.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.worker.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.worker.readinessProbe.timeoutSeconds }} @@ -298,17 +105,8 @@ spec: name: {{ include "dolphinscheduler.fullname" . }}-worker-data - mountPath: "/opt/dolphinscheduler/logs" name: {{ include "dolphinscheduler.fullname" . }}-worker-logs - - mountPath: "/opt/dolphinscheduler/conf/env/dolphinscheduler_env.sh" - subPath: "dolphinscheduler_env.sh" - name: {{ include "dolphinscheduler.fullname" . }}-common-env - {{- if .Values.common.sharedStoragePersistence.enabled }} - - mountPath: {{ .Values.common.sharedStoragePersistence.mountPath | quote }} - name: {{ include "dolphinscheduler.fullname" . }}-shared - {{- end }} - {{- if .Values.common.fsFileResourcePersistence.enabled }} - - mountPath: {{ .Values.common.configmap.RESOURCE_UPLOAD_PATH | quote }} - name: {{ include "dolphinscheduler.fullname" . }}-fs-file - {{- end }} + {{- include "dolphinscheduler.sharedStorage.volumeMount" . | nindent 12 }} + {{- include "dolphinscheduler.fsFileResource.volumeMount" . | nindent 12 }} volumes: - name: {{ include "dolphinscheduler.fullname" . }}-worker-data {{- if .Values.worker.persistentVolumeClaim.dataPersistentVolume.enabled }} @@ -324,23 +122,8 @@ spec: {{- else }} emptyDir: {} {{- end }} - - name: {{ include "dolphinscheduler.fullname" . }}-common-env - configMap: - defaultMode: 0777 - name: {{ include "dolphinscheduler.fullname" . }}-common - items: - - key: DOLPHINSCHEDULER_ENV - path: dolphinscheduler_env.sh - {{- if .Values.common.sharedStoragePersistence.enabled }} - - name: {{ include "dolphinscheduler.fullname" . }}-shared - persistentVolumeClaim: - claimName: {{ include "dolphinscheduler.fullname" . }}-shared - {{- end }} - {{- if .Values.common.fsFileResourcePersistence.enabled }} - - name: {{ include "dolphinscheduler.fullname" . }}-fs-file - persistentVolumeClaim: - claimName: {{ include "dolphinscheduler.fullname" . }}-fs-file - {{- end }} + {{- include "dolphinscheduler.sharedStorage.volume" . | nindent 8 }} + {{- include "dolphinscheduler.fsFileResource.volume" . | nindent 8 }} {{- if .Values.worker.persistentVolumeClaim.enabled }} volumeClaimTemplates: {{- if .Values.worker.persistentVolumeClaim.dataPersistentVolume.enabled }} diff --git a/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-api.yaml b/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-api.yaml index 5daa7c47df..8d7fe80370 100644 --- a/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-api.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-api.yaml @@ -20,16 +20,35 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-api labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} + {{- if and (eq .Values.api.service.type "LoadBalancer") .Values.api.service.annotations }} + annotations: + {{- range $key, $value := .Values.api.service.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} spec: + type: {{ .Values.api.service.type }} + {{- if and (eq .Values.api.service.type "ClusterIP") .Values.api.service.clusterIP }} + clusterIP: {{ .Values.api.service.clusterIP }} + {{- end }} ports: - port: 12345 targetPort: api-port + {{- if and (eq .Values.api.service.type "NodePort") .Values.api.service.nodePort }} + nodePort: {{ .Values.api.service.nodePort }} + {{- end }} protocol: TCP name: api-port + {{- if .Values.api.service.externalIPs }} + externalIPs: + {{- toYaml .Values.api.service.externalIPs | nindent 4 }} + {{- end }} + {{- if and (eq .Values.api.service.type "ExternalName") .Values.api.service.externalName }} + externalName: {{ .Values.api.service.externalName }} + {{- end }} + {{- if and (eq .Values.api.service.type "LoadBalancer") .Values.api.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.api.service.loadBalancerIP }} + {{- end }} selector: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-api - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: api \ No newline at end of file + {{- include "dolphinscheduler.api.labels" . | nindent 4 }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-master-headless.yaml b/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-master-headless.yaml index 0df9d01725..9fb21ca664 100644 --- a/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-master-headless.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-master-headless.yaml @@ -20,17 +20,13 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-master-headless labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-master-headless - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} spec: clusterIP: "None" ports: - - port: {{ .Values.master.configmap.MASTER_LISTEN_PORT }} + - port: 5678 targetPort: master-port protocol: TCP name: master-port selector: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-master - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: master \ No newline at end of file + {{- include "dolphinscheduler.master.labels" . | nindent 4 }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-worker-headless.yaml b/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-worker-headless.yaml index 5dd5478e26..34664fefb7 100644 --- a/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-worker-headless.yaml +++ b/docker/kubernetes/dolphinscheduler/templates/svc-dolphinscheduler-worker-headless.yaml @@ -20,12 +20,11 @@ metadata: name: {{ include "dolphinscheduler.fullname" . }}-worker-headless labels: app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-worker-headless - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- include "dolphinscheduler.common.labels" . | nindent 4 }} spec: clusterIP: "None" ports: - - port: {{ .Values.worker.configmap.WORKER_LISTEN_PORT }} + - port: 1234 targetPort: worker-port protocol: TCP name: worker-port @@ -34,7 +33,4 @@ spec: protocol: TCP name: logger-port selector: - app.kubernetes.io/name: {{ include "dolphinscheduler.fullname" . }}-worker - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/component: worker \ No newline at end of file + {{- include "dolphinscheduler.worker.labels" . | nindent 4 }} \ No newline at end of file diff --git a/docker/kubernetes/dolphinscheduler/values.yaml b/docker/kubernetes/dolphinscheduler/values.yaml index cc8e7090e4..b307bc1b1f 100644 --- a/docker/kubernetes/dolphinscheduler/values.yaml +++ b/docker/kubernetes/dolphinscheduler/values.yaml @@ -54,8 +54,6 @@ externalDatabase: zookeeper: enabled: true fourlwCommandsWhitelist: "srvr,ruok,wchs,cons" - service: - port: "2181" persistence: enabled: false size: "20Gi" @@ -71,6 +69,7 @@ externalZookeeper: common: ## ConfigMap configmap: + DOLPHINSCHEDULER_OPTS: "" DATA_BASEDIR_PATH: "/tmp/dolphinscheduler" RESOURCE_STORAGE_TYPE: "HDFS" RESOURCE_UPLOAD_PATH: "/dolphinscheduler" @@ -78,17 +77,29 @@ common: FS_S3A_ENDPOINT: "s3.xxx.amazonaws.com" FS_S3A_ACCESS_KEY: "xxxxxxx" FS_S3A_SECRET_KEY: "xxxxxxx" - DOLPHINSCHEDULER_ENV: - - "export HADOOP_HOME=/opt/soft/hadoop" - - "export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop" - - "export SPARK_HOME1=/opt/soft/spark1" - - "export SPARK_HOME2=/opt/soft/spark2" - - "export PYTHON_HOME=/usr/bin/python" - - "export JAVA_HOME=/usr/lib/jvm/java-1.8-openjdk" - - "export HIVE_HOME=/opt/soft/hive" - - "export FLINK_HOME=/opt/soft/flink" - - "export DATAX_HOME=/opt/soft/datax" - - "export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH" + HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE: "false" + JAVA_SECURITY_KRB5_CONF_PATH: "/opt/krb5.conf" + LOGIN_USER_KEYTAB_USERNAME: "hdfs@HADOOP.COM" + LOGIN_USER_KEYTAB_PATH: "/opt/hdfs.keytab" + KERBEROS_EXPIRE_TIME: "2" + HDFS_ROOT_USER: "hdfs" + YARN_RESOURCEMANAGER_HA_RM_IDS: "" + YARN_APPLICATION_STATUS_ADDRESS: "http://ds1:8088/ws/v1/cluster/apps/%s" + # skywalking + SKYWALKING_ENABLE: "false" + SW_AGENT_COLLECTOR_BACKEND_SERVICES: "127.0.0.1:11800" + SW_GRPC_LOG_SERVER_HOST: "127.0.0.1" + SW_GRPC_LOG_SERVER_PORT: "11800" + # dolphinscheduler env + HADOOP_HOME: "/opt/soft/hadoop" + HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop" + SPARK_HOME1: "/opt/soft/spark1" + SPARK_HOME2: "/opt/soft/spark2" + PYTHON_HOME: "/usr/bin/python" + JAVA_HOME: "/usr/local/openjdk-8" + HIVE_HOME: "/opt/soft/hive" + FLINK_HOME: "/opt/soft/flink" + DATAX_HOME: "/opt/soft/datax" ## Shared storage persistence mounted into master and worker, such as Hadoop, Spark, Flink and DataX binary package sharedStoragePersistence: enabled: false @@ -137,15 +148,16 @@ master: # cpu: "500m" ## ConfigMap configmap: - DOLPHINSCHEDULER_OPTS: "" + MASTER_SERVER_OPTS: "-Xms1g -Xmx1g -Xmn512m" MASTER_EXEC_THREADS: "100" MASTER_EXEC_TASK_NUM: "20" + MASTER_DISPATCH_TASK_NUM: "3" + MASTER_HOST_SELECTOR: "LowerWeight" MASTER_HEARTBEAT_INTERVAL: "10" MASTER_TASK_COMMIT_RETRYTIMES: "5" MASTER_TASK_COMMIT_INTERVAL: "1000" - MASTER_MAX_CPULOAD_AVG: "100" - MASTER_RESERVED_MEMORY: "0.1" - MASTER_LISTEN_PORT: "5678" + MASTER_MAX_CPULOAD_AVG: "-1" + MASTER_RESERVED_MEMORY: "0.3" ## Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes livenessProbe: @@ -205,13 +217,12 @@ worker: # cpu: "500m" ## ConfigMap configmap: - DOLPHINSCHEDULER_OPTS: "" - LOGGER_SERVER_OPTS: "" + LOGGER_SERVER_OPTS: "-Xms512m -Xmx512m -Xmn256m" + WORKER_SERVER_OPTS: "-Xms1g -Xmx1g -Xmn512m" WORKER_EXEC_THREADS: "100" WORKER_HEARTBEAT_INTERVAL: "10" - WORKER_MAX_CPULOAD_AVG: "100" - WORKER_RESERVED_MEMORY: "0.1" - WORKER_LISTEN_PORT: "1234" + WORKER_MAX_CPULOAD_AVG: "-1" + WORKER_RESERVED_MEMORY: "0.3" WORKER_GROUPS: "default" ## Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes @@ -286,7 +297,7 @@ alert: # cpu: "500m" ## ConfigMap configmap: - DOLPHINSCHEDULER_OPTS: "" + ALERT_SERVER_OPTS: "-Xms512m -Xmx512m -Xmn256m" XLS_FILE_PATH: "/tmp/xls" MAIL_SERVER_HOST: "" MAIL_SERVER_PORT: "" @@ -362,7 +373,7 @@ api: # cpu: "500m" ## ConfigMap configmap: - DOLPHINSCHEDULER_OPTS: "" + API_SERVER_OPTS: "-Xms512m -Xmx512m -Xmn256m" ## Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes livenessProbe: @@ -389,6 +400,22 @@ api: - "ReadWriteOnce" storageClassName: "-" storage: "20Gi" + service: + ## type determines how the Service is exposed. Defaults to ClusterIP. Valid options are ExternalName, ClusterIP, NodePort, and LoadBalancer + type: "ClusterIP" + ## clusterIP is the IP address of the service and is usually assigned randomly by the master + clusterIP: "" + ## nodePort is the port on each node on which this service is exposed when type=NodePort + nodePort: "" + ## externalIPs is a list of IP addresses for which nodes in the cluster will also accept traffic for this service + externalIPs: [] + ## externalName is the external reference that kubedns or equivalent will return as a CNAME record for this service, requires Type to be ExternalName + externalName: "" + ## loadBalancerIP when service.type is LoadBalancer. LoadBalancer will get created with the IP specified in this field + loadBalancerIP: "" + ## annotations may need to be set when service.type is LoadBalancer + ## service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:us-east-1:EXAMPLE_CERT + annotations: {} ingress: enabled: false diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java index 76f27b8aad..ec57ef0ee0 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java @@ -96,7 +96,6 @@ public final class Constants { * yarn.resourcemanager.ha.rm.ids */ public static final String YARN_RESOURCEMANAGER_HA_RM_IDS = "yarn.resourcemanager.ha.rm.ids"; - public static final String YARN_RESOURCEMANAGER_HA_XX = "xx"; /** @@ -147,7 +146,6 @@ public final class Constants { * development.state */ public static final String DEVELOPMENT_STATE = "development.state"; - public static final String DEVELOPMENT_STATE_DEFAULT_VALUE = "true"; /** * string true @@ -811,11 +809,6 @@ public final class Constants { */ public static final String LOGIN_USER_KEY_TAB_USERNAME = "login.user.keytab.username"; - /** - * default worker group id - */ - public static final int DEFAULT_WORKER_ID = -1; - /** * loginUserFromKeytab path */ diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/HadoopUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/HadoopUtils.java index f586b206c6..ffead0b298 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/HadoopUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/HadoopUtils.java @@ -16,27 +16,29 @@ */ package org.apache.dolphinscheduler.common.utils; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; +import static org.apache.dolphinscheduler.common.Constants.RESOURCE_UPLOAD_PATH; + import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.enums.ExecutionStatus; import org.apache.dolphinscheduler.common.enums.ResUploadType; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONException; -import com.alibaba.fastjson.JSONObject; -import org.apache.commons.io.IOUtils; import org.apache.dolphinscheduler.common.enums.ResourceType; + +import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.client.cli.RMAdminCLI; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; import java.nio.file.Files; import java.security.PrivilegedExceptionAction; import java.util.Collections; @@ -46,7 +48,15 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.apache.dolphinscheduler.common.Constants.RESOURCE_UPLOAD_PATH; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONException; +import com.alibaba.fastjson.JSONObject; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; /** * hadoop utils @@ -65,7 +75,7 @@ public class HadoopUtils implements Closeable { private static final LoadingCache cache = CacheBuilder .newBuilder() - .expireAfterWrite(PropertyUtils.getInt(Constants.KERBEROS_EXPIRE_TIME, 7), TimeUnit.DAYS) + .expireAfterWrite(PropertyUtils.getInt(Constants.KERBEROS_EXPIRE_TIME, 2), TimeUnit.HOURS) .build(new CacheLoader() { @Override public HadoopUtils load(String key) throws Exception { diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 84f800833a..5bb7c39289 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -15,16 +15,16 @@ # limitations under the License. # -# resource storage type : HDFS, S3, NONE +# user data local directory path, please make sure the directory exists and have read write permissions +data.basedir.path=/tmp/dolphinscheduler + +# resource storage type: HDFS, S3, NONE resource.storage.type=NONE -# resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions。"/dolphinscheduler" is recommended +# resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended resource.upload.path=/dolphinscheduler -# user data local directory path, please make sure the directory exists and have read write permissions -#data.basedir.path=/tmp/dolphinscheduler - -# whether kerberos starts +# whether to startup kerberos hadoop.security.authentication.startup.state=false # java.security.krb5.conf path @@ -36,13 +36,16 @@ login.user.keytab.username=hdfs-mycluster@ESZ.COM # login user from keytab path login.user.keytab.path=/opt/hdfs.headless.keytab -#resource.view.suffixs +# kerberos expire time, the unit is hour +kerberos.expire.time=2 + +# resource view suffixss #resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js -# if resource.storage.type=HDFS, the user need to have permission to create directories under the HDFS root path +# if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path hdfs.root.user=hdfs -# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS, When namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir +# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir fs.defaultFS=hdfs://mycluster:8020 # if resource.storage.type=S3, s3 endpoint @@ -54,13 +57,14 @@ fs.s3a.access.key=A3DXS30FO22544RE # if resource.storage.type=S3, s3 secret key fs.s3a.secret.key=OloCLq3n+8+sdPHUhJ21XrSxTC+JK -# if resourcemanager HA enable, please type the HA ips ; if resourcemanager is single, make this value empty +# if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx -# if resourcemanager HA enable or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname. +# if resourcemanager HA is enabled or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname yarn.application.status.address=http://ds1:8088/ws/v1/cluster/apps/%s # system env path #dolphinscheduler.env.path=env/dolphinscheduler_env.sh + +# development state development.state=false -kerberos.expire.time=1 diff --git a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-binary.xml b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-binary.xml index 3e282bddd7..9201fe1fd0 100644 --- a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-binary.xml +++ b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-binary.xml @@ -156,7 +156,7 @@ **/*.* - ./skywalking-agent + ./ext/skywalking-agent diff --git a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java index efd29ddd3c..357aeead51 100644 --- a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java +++ b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java @@ -25,21 +25,27 @@ import org.springframework.stereotype.Component; @PropertySource(value = "master.properties") public class MasterConfig { + @Value("${master.listen.port:5678}") + private int listenPort; + @Value("${master.exec.threads:100}") private int masterExecThreads; @Value("${master.exec.task.num:20}") private int masterExecTaskNum; + @Value("${master.dispatch.task.num:3}") + private int masterDispatchTaskNumber; + + @Value("${master.host.selector:LowerWeight}") + private String hostSelector; + @Value("${master.heartbeat.interval:10}") private int masterHeartbeatInterval; @Value("${master.task.commit.retryTimes:5}") private int masterTaskCommitRetryTimes; - @Value("${master.dispatch.task.num :3}") - private int masterDispatchTaskNumber; - @Value("${master.task.commit.interval:1000}") private int masterTaskCommitInterval; @@ -49,12 +55,6 @@ public class MasterConfig { @Value("${master.reserved.memory:0.3}") private double masterReservedMemory; - @Value("${master.host.selector:lowerWeight}") - private String hostSelector; - - @Value("${master.listen.port:5678}") - private int listenPort; - public int getListenPort() { return listenPort; } diff --git a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java index 2dedaf8e1b..c866935904 100644 --- a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java +++ b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java @@ -17,9 +17,10 @@ */ package org.apache.dolphinscheduler.server.worker.config; +import org.apache.dolphinscheduler.common.Constants; + import java.util.Set; -import org.apache.dolphinscheduler.common.Constants; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.PropertySource; import org.springframework.stereotype.Component; @@ -28,15 +29,15 @@ import org.springframework.stereotype.Component; @PropertySource(value = "worker.properties") public class WorkerConfig { + @Value("${worker.listen.port:1234}") + private int listenPort; + @Value("${worker.exec.threads:100}") private int workerExecThreads; @Value("${worker.heartbeat.interval:10}") private int workerHeartbeatInterval; - @Value("${worker.fetch.task.num:3}") - private int workerFetchTaskNum; - @Value("${worker.max.cpuload.avg:-1}") private int workerMaxCpuloadAvg; @@ -46,9 +47,6 @@ public class WorkerConfig { @Value("#{'${worker.groups:default}'.split(',')}") private Set workerGroups; - @Value("${worker.listen.port: 1234}") - private int listenPort; - public int getListenPort() { return listenPort; } @@ -81,14 +79,6 @@ public class WorkerConfig { this.workerHeartbeatInterval = workerHeartbeatInterval; } - public int getWorkerFetchTaskNum() { - return workerFetchTaskNum; - } - - public void setWorkerFetchTaskNum(int workerFetchTaskNum) { - this.workerFetchTaskNum = workerFetchTaskNum; - } - public double getWorkerReservedMemory() { return workerReservedMemory; } diff --git a/dolphinscheduler-server/src/main/resources/master.properties b/dolphinscheduler-server/src/main/resources/master.properties index 01d1189507..3edc481c73 100644 --- a/dolphinscheduler-server/src/main/resources/master.properties +++ b/dolphinscheduler-server/src/main/resources/master.properties @@ -15,7 +15,10 @@ # limitations under the License. # -# master execute thread num +# master listen port +#master.listen.port=5678 + +# master execute thread number #master.exec.threads=100 # master execute task number in parallel @@ -24,6 +27,9 @@ # master dispatch task number #master.dispatch.task.num=3 +# master host selector to select a suitable worker, default value: LowerWeight. Optional values include Random, RoundRobin, LowerWeight +#master.host.selector=LowerWeight + # master heartbeat interval #master.heartbeat.interval=10 @@ -33,11 +39,8 @@ # master commit task interval #master.task.commit.interval=1000 -# only less than cpu avg load, master server can work. default value -1 : the number of cpu cores * 2 +# only less than cpu avg load, master server can work. default value -1: the number of cpu cores * 2 #master.max.cpuload.avg=-1 -# only larger than reserved memory, master server can work. default value : physical memory * 1/10, unit is G. +# only larger than reserved memory, master server can work. default value 0.3, the unit is G #master.reserved.memory=0.3 - -# master listen port -#master.listen.port=5678 diff --git a/dolphinscheduler-server/src/main/resources/worker.properties b/dolphinscheduler-server/src/main/resources/worker.properties index 395143b027..dc61097185 100644 --- a/dolphinscheduler-server/src/main/resources/worker.properties +++ b/dolphinscheduler-server/src/main/resources/worker.properties @@ -15,20 +15,20 @@ # limitations under the License. # -# worker execute thread num +# worker listener port +#worker.listen.port=1234 + +# worker execute thread number #worker.exec.threads=100 # worker heartbeat interval #worker.heartbeat.interval=10 # only less than cpu avg load, worker server can work. default value -1: the number of cpu cores * 2 -#worker.max.cpuload.avg= -1 +#worker.max.cpuload.avg=-1 -# only larger than reserved memory, worker server can work. default value : physical memory * 1/6, unit is G. +# only larger than reserved memory, worker server can work. default value 0.3, the unit is G #worker.reserved.memory=0.3 -# worker listener port -#worker.listen.port=1234 - -# default worker groups, if this worker belongs different groups, you can config the following like that 'worker.groups=default,test' -worker.groups=default +# default worker groups separated by comma, like 'worker.groups=default,test' +#worker.groups=default diff --git a/script/dolphinscheduler-daemon.sh b/script/dolphinscheduler-daemon.sh index 287a4ff7ab..358eded4eb 100755 --- a/script/dolphinscheduler-daemon.sh +++ b/script/dolphinscheduler-daemon.sh @@ -33,7 +33,7 @@ echo "Begin $startStop $command......" BIN_DIR=`dirname $0` BIN_DIR=`cd "$BIN_DIR"; pwd` -DOLPHINSCHEDULER_HOME=$BIN_DIR/.. +DOLPHINSCHEDULER_HOME=`cd "$BIN_DIR/.."; pwd` source /etc/profile @@ -57,44 +57,39 @@ pid=$DOLPHINSCHEDULER_PID_DIR/dolphinscheduler-$command.pid cd $DOLPHINSCHEDULER_HOME +export DOLPHINSCHEDULER_OPTS="-server -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=128m -Xss512k -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:LargePageSizeInBytes=128m -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:+PrintGCDetails -Xloggc:$DOLPHINSCHEDULER_LOG_DIR/gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof -XshowSettings:vm $DOLPHINSCHEDULER_OPTS" + if [ "$command" = "api-server" ]; then - HEAP_INITIAL_SIZE=1g - HEAP_MAX_SIZE=1g - HEAP_NEW_GENERATION_SIZE=512m LOG_FILE="-Dlogging.config=classpath:logback-api.xml -Dspring.profiles.active=api" CLASS=org.apache.dolphinscheduler.api.ApiApplicationServer + HEAP_OPTS="-Xms1g -Xmx1g -Xmn512m" + export DOLPHINSCHEDULER_OPTS="$HEAP_OPTS $DOLPHINSCHEDULER_OPTS $API_SERVER_OPTS" elif [ "$command" = "master-server" ]; then - HEAP_INITIAL_SIZE=4g - HEAP_MAX_SIZE=4g - HEAP_NEW_GENERATION_SIZE=2g LOG_FILE="-Dlogging.config=classpath:logback-master.xml -Ddruid.mysql.usePingMethod=false" CLASS=org.apache.dolphinscheduler.server.master.MasterServer + HEAP_OPTS="-Xms4g -Xmx4g -Xmn2g" + export DOLPHINSCHEDULER_OPTS="$HEAP_OPTS $DOLPHINSCHEDULER_OPTS $MASTER_SERVER_OPTS" elif [ "$command" = "worker-server" ]; then - HEAP_INITIAL_SIZE=2g - HEAP_MAX_SIZE=2g - HEAP_NEW_GENERATION_SIZE=1g LOG_FILE="-Dlogging.config=classpath:logback-worker.xml -Ddruid.mysql.usePingMethod=false" CLASS=org.apache.dolphinscheduler.server.worker.WorkerServer + HEAP_OPTS="-Xms2g -Xmx2g -Xmn1g" + export DOLPHINSCHEDULER_OPTS="$HEAP_OPTS $DOLPHINSCHEDULER_OPTS $WORKER_SERVER_OPTS" elif [ "$command" = "alert-server" ]; then - HEAP_INITIAL_SIZE=1g - HEAP_MAX_SIZE=1g - HEAP_NEW_GENERATION_SIZE=512m LOG_FILE="-Dlogback.configurationFile=conf/logback-alert.xml" CLASS=org.apache.dolphinscheduler.alert.AlertServer + HEAP_OPTS="-Xms1g -Xmx1g -Xmn512m" + export DOLPHINSCHEDULER_OPTS="$HEAP_OPTS $DOLPHINSCHEDULER_OPTS $ALERT_SERVER_OPTS" elif [ "$command" = "logger-server" ]; then - HEAP_INITIAL_SIZE=1g - HEAP_MAX_SIZE=1g - HEAP_NEW_GENERATION_SIZE=512m CLASS=org.apache.dolphinscheduler.server.log.LoggerServer + HEAP_OPTS="-Xms1g -Xmx1g -Xmn512m" + export DOLPHINSCHEDULER_OPTS="$HEAP_OPTS $DOLPHINSCHEDULER_OPTS $LOGGER_SERVER_OPTS" else - echo "Error: No command named \`$command' was found." + echo "Error: No command named '$command' was found." exit 1 fi -export DOLPHINSCHEDULER_OPTS="-server -Xms$HEAP_INITIAL_SIZE -Xmx$HEAP_MAX_SIZE -Xmn$HEAP_NEW_GENERATION_SIZE -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=128m -Xss512k -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:LargePageSizeInBytes=128m -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:+PrintGCDetails -Xloggc:$DOLPHINSCHEDULER_LOG_DIR/gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof -XshowSettings:vm $DOLPHINSCHEDULER_OPTS" - if [ "$SKYWALKING_ENABLE" = "true" ]; then - SKYWALKING_OPTS="-javaagent:$DOLPHINSCHEDULER_HOME/skywalking-agent/skywalking-agent.jar -DSW_AGENT_NAME=dolphinscheduler::$command -DSW_LOGGING_FILE_NAME=$DOLPHINSCHEDULER_LOG_DIR/skywalking-dolphinscheduler-$command.log" + SKYWALKING_OPTS="-javaagent:$DOLPHINSCHEDULER_HOME/ext/skywalking-agent/skywalking-agent.jar -DSW_AGENT_NAME=dolphinscheduler::$command -DSW_LOGGING_FILE_NAME=skywalking-dolphinscheduler-$command.log" export DOLPHINSCHEDULER_OPTS="$DOLPHINSCHEDULER_OPTS $SKYWALKING_OPTS" echo "Info: Skywalking enabled opts: $SKYWALKING_OPTS" else @@ -105,11 +100,7 @@ case $startStop in (start) if [ "$DOCKER" = "true" ]; then echo start $command in docker - export DOLPHINSCHEDULER_OPTS="$DOLPHINSCHEDULER_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap" - if [ "$command" = "logger-server" ]; then - LOGGER_SERVER_OPTS=${LOGGER_SERVER_OPTS:-"-Xms$HEAP_INITIAL_SIZE -Xmx$HEAP_MAX_SIZE -Xmn$HEAP_NEW_GENERATION_SIZE"} - export DOLPHINSCHEDULER_OPTS="$DOLPHINSCHEDULER_OPTS $LOGGER_SERVER_OPTS" - fi + export DOLPHINSCHEDULER_OPTS="$DOLPHINSCHEDULER_OPTS -XX:+UseContainerSupport -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap" exec_command="$LOG_FILE $DOLPHINSCHEDULER_OPTS -classpath $DOLPHINSCHEDULER_CONF_DIR:$DOLPHINSCHEDULER_LIB_JARS $CLASS" $JAVA_HOME/bin/java $exec_command else diff --git a/script/scp-hosts.sh b/script/scp-hosts.sh index 50a94f9b0f..258ebe5325 100755 --- a/script/scp-hosts.sh +++ b/script/scp-hosts.sh @@ -54,7 +54,7 @@ do echo "scp dirs to $host/$installPath starting" ssh -p $sshPort $host "cd $installPath/; rm -rf bin/ conf/ lib/ script/ sql/ ui/" - for dsDir in bin conf lib script sql ui skywalking-agent install.sh + for dsDir in bin conf ext lib script sql ui install.sh do # if worker in workersGroupMap if [[ "${workersGroupMap[${host}]}" ]] && [[ "${dsDir}" == "conf" ]]; then diff --git a/tools/dependencies/check-LICENSE.sh b/tools/dependencies/check-LICENSE.sh index 123727a024..b3fdfc2520 100755 --- a/tools/dependencies/check-LICENSE.sh +++ b/tools/dependencies/check-LICENSE.sh @@ -27,9 +27,9 @@ echo '=== Self modules: ' && ./mvnw --batch-mode --quiet -Dexec.executable='echo echo '=== Distributed dependencies: ' && ls dist/lib | tee all-dependencies.txt -echo '=== Skywalking agent dependencies: ' && ls dist/skywalking-agent | grep .jar | tee -a all-dependencies.txt \ - && ls dist/skywalking-agent/plugins | tee -a all-dependencies.txt \ - && ls dist/skywalking-agent/activations | tee -a all-dependencies.txt +echo '=== Skywalking agent dependencies: ' && ls dist/ext/skywalking-agent | grep .jar | tee -a all-dependencies.txt \ + && ls dist/ext/skywalking-agent/plugins | tee -a all-dependencies.txt \ + && ls dist/ext/skywalking-agent/activations | tee -a all-dependencies.txt # Exclude all self modules(jars) to generate all third-party dependencies echo '=== Third party dependencies: ' && grep -vf self-modules.txt all-dependencies.txt | tee third-party-dependencies.txt