Browse Source

[Feature][Deployment] Add KEDA autoscaler support for worker deployment when deployed in K8S cluster (#13367)

* Add keda autoscaler support for worker deployment when deployed in K8S cluster

* Add mysql scaler for worker autoscaling

* Add docs for worker autoscaler

* Add support for external postgresql db
3.2.0-release
Eric Gao 2 years ago committed by GitHub
parent
commit
366f999167
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 102
      deploy/kubernetes/dolphinscheduler/templates/keda-autoscaler-worker.yaml
  2. 33
      deploy/kubernetes/dolphinscheduler/values.yaml
  3. 37
      docs/docs/en/guide/installation/kubernetes.md
  4. 36
      docs/docs/zh/guide/installation/kubernetes.md

102
deploy/kubernetes/dolphinscheduler/templates/keda-autoscaler-worker.yaml

@ -0,0 +1,102 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
################################
## DolphinScheduler Worker KEDA Scaler
#################################
{{- if and .Values.worker.keda.enabled }}
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: {{ include "dolphinscheduler.fullname" . }}-worker
labels:
component: worker-horizontalpodautoscaler
deploymentName: {{ include "dolphinscheduler.fullname" . }}-worker
spec:
scaleTargetRef:
kind: StatefulSet
name: {{ include "dolphinscheduler.fullname" . }}-worker
pollingInterval: {{ .Values.worker.keda.pollingInterval }}
cooldownPeriod: {{ .Values.worker.keda.cooldownPeriod }}
minReplicaCount: {{ .Values.worker.keda.minReplicaCount }}
maxReplicaCount: {{ .Values.worker.keda.maxReplicaCount }}
{{- if .Values.worker.keda.advanced }}
advanced:
{{ toYaml .Values.worker.keda.advanced | indent 4 }}
{{- end }}
# This is just an example, you could customize the trigger rule.
# FYI, check TaskExecutionStatus.java for the human-readable meaning of state values below.
triggers:
{{- if .Values.postgresql.enabled }}
- type: postgresql
metadata:
host: {{ template "dolphinscheduler.postgresql.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local
port: "5432"
dbName: {{ .Values.postgresql.postgresqlDatabase }}
userName: {{ .Values.postgresql.postgresqlUsername }}
passwordFromEnv: SPRING_DATASOURCE_PASSWORD
sslmode: "disable"
targetQueryValue: "1"
query: >-
SELECT ceil(COUNT(*)::decimal / {{ .Values.worker.env.WORKER_EXEC_THREADS }})
FROM t_ds_task_instance
WHERE state IN (0, 1, 8, 12, 17)
{{- else if .Values.mysql.enabled }}
- type: mysql
metadata:
host: {{ template "dolphinscheduler.mysql.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local
port: "3306"
dbName: {{ .Values.mysql.auth.database }}
username: {{ .Values.mysql.auth.username }}
passwordFromEnv: SPRING_DATASOURCE_PASSWORD
queryValue: "1"
query: >-
SELECT CEIL(COUNT(*) / {{ .Values.worker.env.WORKER_EXEC_THREADS }})
FROM t_ds_task_instance
WHERE state IN (0, 1, 8, 12, 17)
{{- else if .Values.externalDatabase.enabled }}
{{- if eq .Values.externalDatabase.type "mysql" }}
- type: mysql
metadata:
host: {{ .Values.externalDatabase.host }}
# mysql scaler requests port in string format
port: "{{ .Values.externalDatabase.port }}"
dbName: {{ .Values.externalDatabase.database }}
username: {{ .Values.externalDatabase.username }}
passwordFromEnv: SPRING_DATASOURCE_PASSWORD
queryValue: "1"
query: >-
SELECT CEIL(COUNT(*) / {{ .Values.worker.env.WORKER_EXEC_THREADS }})
FROM t_ds_task_instance
WHERE state IN (0, 1, 8, 12, 17)
{{- else if eq .Values.externalDatabase.type "postgresql" }}
- type: postgresql
metadata:
host: {{ .Values.externalDatabase.host }}
port: "{{ .Values.externalDatabase.port }}"
dbName: {{ .Values.externalDatabase.database }}
userName: {{ .Values.externalDatabase.username }}
passwordFromEnv: SPRING_DATASOURCE_PASSWORD
sslmode: "disable"
targetQueryValue: "1"
query: >-
SELECT ceil(COUNT(*)::decimal / {{ .Values.worker.env.WORKER_EXEC_THREADS }})
FROM t_ds_task_instance
WHERE state IN (0, 1, 8, 12, 17)
{{- end }}
{{- end }}
{{- end }}

33
deploy/kubernetes/dolphinscheduler/values.yaml

@ -58,7 +58,7 @@ mysql:
storageClass: "-"
minio:
enabled: false
enabled: true
auth:
rootUser: minioadmin
rootPassword: minioadmin
@ -69,6 +69,7 @@ minio:
## If exists external database, and set postgresql.enable value to false.
## external database will be used, otherwise Dolphinscheduler's database will be used.
externalDatabase:
enabled: false
type: "postgresql"
host: "localhost"
port: "5432"
@ -100,7 +101,7 @@ conf:
data.basedir.path: /tmp/dolphinscheduler
# resource storage type: HDFS, S3, NONE
resource.storage.type: NONE
resource.storage.type: S3
# resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path: /dolphinscheduler
@ -385,6 +386,34 @@ worker:
WORKER_HEART_ERROR_THRESHOLD: "5"
WORKER_HOST_WEIGHT: "100"
WORKER_GROUPS: "default"
keda:
enabled: false
namespaceLabels: { }
# How often KEDA polls the DolphinScheduler DB to report new scale requests to the HPA
pollingInterval: 5
# How many seconds KEDA will wait before scaling to zero.
# Note that HPA has a separate cooldown period for scale-downs
cooldownPeriod: 30
# Minimum number of workers created by keda
minReplicaCount: 0
# Maximum number of workers created by keda
maxReplicaCount: 3
# Specify HPA related options
advanced: { }
# horizontalPodAutoscalerConfig:
# behavior:
# scaleDown:
# stabilizationWindowSeconds: 300
# policies:
# - type: Percent
# value: 100
# periodSeconds: 15
alert:
## Number of desired pods. This is a pointer to distinguish between explicit zero and not specified. Defaults to 1.

37
docs/docs/en/guide/installation/kubernetes.md

@ -87,6 +87,43 @@ $ kubectl delete pvc -l app.kubernetes.io/instance=dolphinscheduler
> **Note**: Deleting the PVC's will delete all data as well. Please be cautious before doing it.
## [Experimental] Worker Autoscaling
> **Warning**: Currently this is an experimental feature and may not be suitable for production!
`DolphinScheduler` uses [KEDA](https://github.com/kedacore/keda) for worker autoscaling. However, `DolphinScheduler` disables
this feature by default. To turn on worker autoscaling:
Firstly, you need to create a namespace for `KEDA` and install it with `helm`:
```bash
helm repo add kedacore https://kedacore.github.io/charts
helm repo update
kubectl create namespace keda
helm install keda kedacore/keda \
--namespace keda \
--version "v2.0.0"
```
Secondly, you need to set `worker.keda.enabled` to `true` in `values.yaml` or install the chart by:
```bash
helm install dolphinscheduler . --set worker.keda.enabled=true -n <your-namespace-to-deploy-dolphinscheduler>
```
Once autoscaling enabled, the number of workers will scale between `minReplicaCount` and `maxReplicaCount` based on the states
of your tasks. For example, when there is no tasks running in your `DolphinScheduler` instance, there will be no workers,
which will significantly save the resources.
Worker autoscaling feature is compatible with `postgresql` and `mysql` shipped with `DolphinScheduler official helm chart`. If you
use external database, worker autoscaling feature only supports external `mysql` and `postgresql` databases.
If you need to change the value of worker `WORKER_EXEC_THREADS` when using autoscaling feature,
please change `worker.env.WORKER_EXEC_THREADS` in `values.yaml` instead of through `configmap`.
## Configuration
The configuration file is `values.yaml`, and the [Appendix-Configuration](#appendix-configuration) tables lists the configurable parameters of the DolphinScheduler and their default values.

36
docs/docs/zh/guide/installation/kubernetes.md

@ -87,6 +87,42 @@ $ kubectl delete pvc -l app.kubernetes.io/instance=dolphinscheduler
> **注意**: 删除 PVC 也会删除所有数据,请谨慎操作!
## [试验性] worker 自动扩缩容
> **警告**: 目前此功能尚在试验阶段,不建议在生产环境使用!
`DolphinScheduler` 使用 [KEDA](https://github.com/kedacore/keda) 对 worker 进行自动扩缩容。但是 `DolphinScheduler` 默认是不启用该功能的。
您需要做下列配置来启用该功能:
首先您需要创建一个单独的命名空间并使用 `helm` 安装 `KEDA`
```bash
helm repo add kedacore https://kedacore.github.io/charts
helm repo update
kubectl create namespace keda
helm install keda kedacore/keda \
--namespace keda \
--version "v2.0.0"
```
其次,您需要将 `values.yaml` 中的 `worker.keda.enabled` 配置设置成 `true`,或者您可以通过以下命令安装 chart:
```bash
helm install dolphinscheduler . --set worker.keda.enabled=true -n <your-namespace-to-deploy-dolphinscheduler>
```
一旦自动扩缩容功能启用,worker的数量将基于任务状态在 `minReplicaCount``maxReplicaCount` 之间弹性扩缩。
举例来说,当您的 `DolphinScheduler` 实例中没有任务在运行时,将不会有 worker。因此,这个功能会显著节约资源,降低您的使用成本。
自动扩缩容功能目前支持 `DolphinScheduler 官方 helm chart` 中自带的 `postgresql` and `mysql`
如果您要使用外部的数据库,自动扩缩容功能目前只支持 `mysql``postgresql` 类型的外部数据库。
如果您在使用自动扩缩容时需要改变 worker `WORKER_EXEC_THREADS` 的值,请直接在 `values.yaml` 中修改 `worker.env.WORKER_EXEC_THREADS` 的值,
而不要通过 `configmap` 来更新。
## 配置
配置文件为 `values.yaml`,[附录-配置](#appendix-configuration) 表格列出了 DolphinScheduler 的可配置参数及其默认值 <!-- markdown-link-check-disable-line -->

Loading…
Cancel
Save