# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Default values for dolphinscheduler-chart. # This is a YAML-formatted file. # Declare variables to be passed into your templates. timezone: "Asia/Shanghai" image: registry: "dolphinscheduler.docker.scarf.sh/apache" tag: "3.1.8" pullPolicy: "IfNotPresent" pullSecret: "" master: dolphinscheduler-master worker: dolphinscheduler-worker api: dolphinscheduler-api alert: dolphinscheduler-alert-server tools: dolphinscheduler-tools ## If not exists external database, by default, Dolphinscheduler's database will use it. postgresql: enabled: true postgresqlUsername: "root" postgresqlPassword: "root" postgresqlDatabase: "dolphinscheduler" params: "characterEncoding=utf8" persistence: enabled: false size: "20Gi" storageClass: "-" mysql: enabled: false auth: username: "ds" password: "ds" database: "dolphinscheduler" params: "characterEncoding=utf8" primary: persistence: enabled: false size: "20Gi" storageClass: "-" ## If exists external database, and set postgresql.enable value to false. ## external database will be used, otherwise Dolphinscheduler's database will be used. externalDatabase: type: "postgresql" host: "localhost" port: "5432" username: "root" password: "root" database: "dolphinscheduler" params: "characterEncoding=utf8" ## If not exists external registry, the zookeeper registry will be used by default. zookeeper: enabled: true service: port: 2181 fourlwCommandsWhitelist: "srvr,ruok,wchs,cons" persistence: enabled: false size: "20Gi" storageClass: "-" ## If exists external registry and set zookeeper.enable value to false, the external registry will be used. externalRegistry: registryPluginDir: "lib/plugin/registry" registryPluginName: "zookeeper" registryServers: "127.0.0.1:2181" conf: common: # user data local directory path, please make sure the directory exists and have read write permissions data.basedir.path: /tmp/dolphinscheduler # resource storage type: HDFS, S3, NONE resource.storage.type: HDFS # resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended resource.storage.upload.base.path: /dolphinscheduler # whether to startup kerberos hadoop.security.authentication.startup.state: false # java.security.krb5.conf path java.security.krb5.conf.path: /opt/krb5.conf # login user from keytab username login.user.keytab.username: hdfs-mycluster@ESZ.COM # login user from keytab path login.user.keytab.path: /opt/hdfs.headless.keytab # kerberos expire time, the unit is hour kerberos.expire.time: 2 # resource view suffixs #resource.view.suffixs: txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js # if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path resource.hdfs.root.user: hdfs # if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir resource.hdfs.fs.defaultFS: hdfs://mycluster:8020 # The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required resource.aws.access.key.id: minioadmin # The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required resource.aws.secret.access.key: minioadmin # The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required resource.aws.region: cn-north-1 # The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. resource.aws.s3.bucket.name: dolphinscheduler # You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn resource.aws.s3.endpoint: http://localhost:9000 # resourcemanager port, the default value is 8088 if not specified resource.manager.httpaddress.port: 8088 # if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty yarn.resourcemanager.ha.rm.ids: 192.168.xx.xx,192.168.xx.xx # if resourcemanager HA is enabled or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname yarn.application.status.address: http://ds1:%s/ws/v1/cluster/apps/%s # job history status url when application number threshold is reached(default 10000, maybe it was set to 1000) yarn.job.history.status.address: http://ds1:19888/ws/v1/history/mapreduce/jobs/%s # datasource encryption enable datasource.encryption.enable: false # datasource encryption salt datasource.encryption.salt: '!@#$%^&*' # data quality option data-quality.jar.name: dolphinscheduler-data-quality-dev-SNAPSHOT.jar #data-quality.error.output.path: /tmp/data-quality-error-data # Network IP gets priority, default inner outer # Whether hive SQL is executed in the same session support.hive.oneSession: false # use sudo or not, if set true, executing user is tenant user and deploy user needs sudo permissions; if set false, executing user is the deploy user and doesn't need sudo permissions sudo.enable: true # network interface preferred like eth0, default: empty #dolphin.scheduler.network.interface.preferred: # network IP gets priority, default: inner outer #dolphin.scheduler.network.priority.strategy: default # system env path #dolphinscheduler.env.path: dolphinscheduler_env.sh # development state development.state: false # rpc port alert.rpc.port: 50052 # Url endpoint for zeppelin RESTful API zeppelin.rest.url: http://localhost:8080 common: ## Configmap configmap: DOLPHINSCHEDULER_OPTS: "" DATA_BASEDIR_PATH: "/tmp/dolphinscheduler" RESOURCE_UPLOAD_PATH: "/dolphinscheduler" # dolphinscheduler env HADOOP_HOME: "/opt/soft/hadoop" HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop" SPARK_HOME1: "/opt/soft/spark1" SPARK_HOME2: "/opt/soft/spark2" PYTHON_HOME: "/usr/bin/python" JAVA_HOME: "/opt/java/openjdk" HIVE_HOME: "/opt/soft/hive" FLINK_HOME: "/opt/soft/flink" DATAX_HOME: "/opt/soft/datax" ## Shared storage persistence mounted into api, master and worker, such as Hadoop, Spark, Flink and DataX binary package sharedStoragePersistence: enabled: false mountPath: "/opt/soft" accessModes: - "ReadWriteMany" ## storageClassName must support the access mode: ReadWriteMany storageClassName: "-" storage: "20Gi" ## If RESOURCE_STORAGE_TYPE is HDFS and FS_DEFAULT_FS is file:///, fsFileResourcePersistence should be enabled for resource storage fsFileResourcePersistence: enabled: false accessModes: - "ReadWriteMany" ## storageClassName must support the access mode: ReadWriteMany storageClassName: "-" storage: "20Gi" master: ## PodManagementPolicy controls how pods are created during initial scale up, when replacing pods on nodes, or when scaling down. podManagementPolicy: "Parallel" ## Replicas is the desired number of replicas of the given Template. replicas: "3" ## You can use annotations to attach arbitrary non-identifying metadata to objects. ## Clients such as tools and libraries can retrieve this metadata. annotations: {} ## Affinity is a group of affinity scheduling rules. If specified, the pod's scheduling constraints. ## More info: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#affinity-v1-core affinity: {} ## NodeSelector is a selector which must be true for the pod to fit on a node. ## Selector which must match a node's labels for the pod to be scheduled on that node. ## More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ nodeSelector: {} ## Tolerations are appended (excluding duplicates) to pods running with this RuntimeClass during admission, ## effectively unioning the set of nodes tolerated by the pod and the RuntimeClass. tolerations: [] ## Compute Resources required by this container. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container resources: {} # resources: # limits: # memory: "8Gi" # cpu: "4" # requests: # memory: "2Gi" # cpu: "500m" ## Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes livenessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes readinessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## PersistentVolumeClaim represents a reference to a PersistentVolumeClaim in the same namespace. ## The StatefulSet controller is responsible for mapping network identities to claims in a way that maintains the identity of a pod. ## Every claim in this list must have at least one matching (by name) volumeMount in one container in the template. ## A claim in this list takes precedence over any volumes in the template, with the same name. persistentVolumeClaim: enabled: false accessModes: - "ReadWriteOnce" storageClassName: "-" storage: "20Gi" env: JAVA_OPTS: "-Xms1g -Xmx1g -Xmn512m" MASTER_EXEC_THREADS: "100" MASTER_EXEC_TASK_NUM: "20" MASTER_DISPATCH_TASK_NUM: "3" MASTER_HOST_SELECTOR: "LowerWeight" MASTER_HEARTBEAT_INTERVAL: "10s" MASTER_HEARTBEAT_ERROR_THRESHOLD: "5" MASTER_TASK_COMMIT_RETRYTIMES: "5" MASTER_TASK_COMMIT_INTERVAL: "1s" MASTER_STATE_WHEEL_INTERVAL: "5s" MASTER_MAX_CPU_LOAD_AVG: "-1" MASTER_RESERVED_MEMORY: "0.3" MASTER_FAILOVER_INTERVAL: "10m" MASTER_KILL_YARN_JOB_WHEN_HANDLE_FAILOVER: "true" worker: ## PodManagementPolicy controls how pods are created during initial scale up, when replacing pods on nodes, or when scaling down. podManagementPolicy: "Parallel" ## Replicas is the desired number of replicas of the given Template. replicas: "3" ## You can use annotations to attach arbitrary non-identifying metadata to objects. ## Clients such as tools and libraries can retrieve this metadata. annotations: {} ## Affinity is a group of affinity scheduling rules. If specified, the pod's scheduling constraints. ## More info: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#affinity-v1-core affinity: {} ## NodeSelector is a selector which must be true for the pod to fit on a node. ## Selector which must match a node's labels for the pod to be scheduled on that node. ## More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ nodeSelector: {} ## Tolerations are appended (excluding duplicates) to pods running with this RuntimeClass during admission, ## effectively unioning the set of nodes tolerated by the pod and the RuntimeClass. tolerations: [] ## Compute Resources required by this container. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container resources: {} # resources: # limits: # memory: "8Gi" # cpu: "4" # requests: # memory: "2Gi" # cpu: "500m" ## Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes livenessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes readinessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## PersistentVolumeClaim represents a reference to a PersistentVolumeClaim in the same namespace. ## The StatefulSet controller is responsible for mapping network identities to claims in a way that maintains the identity of a pod. ## Every claim in this list must have at least one matching (by name) volumeMount in one container in the template. ## A claim in this list takes precedence over any volumes in the template, with the same name. persistentVolumeClaim: enabled: false ## dolphinscheduler data volume dataPersistentVolume: enabled: false accessModes: - "ReadWriteOnce" storageClassName: "-" storage: "20Gi" ## dolphinscheduler logs volume logsPersistentVolume: enabled: false accessModes: - "ReadWriteOnce" storageClassName: "-" storage: "20Gi" env: WORKER_MAX_CPU_LOAD_AVG: "-1" WORKER_RESERVED_MEMORY: "0.3" WORKER_EXEC_THREADS: "100" WORKER_HEARTBEAT_INTERVAL: "10s" WORKER_HEART_ERROR_THRESHOLD: "5" WORKER_HOST_WEIGHT: "100" alert: ## Number of desired pods. This is a pointer to distinguish between explicit zero and not specified. Defaults to 1. replicas: 1 ## The deployment strategy to use to replace existing pods with new ones. strategy: type: "RollingUpdate" rollingUpdate: maxSurge: "25%" maxUnavailable: "25%" ## You can use annotations to attach arbitrary non-identifying metadata to objects. ## Clients such as tools and libraries can retrieve this metadata. annotations: {} ## NodeSelector is a selector which must be true for the pod to fit on a node. ## Selector which must match a node's labels for the pod to be scheduled on that node. ## More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ affinity: {} ## Compute Resources required by this container. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container nodeSelector: {} ## Tolerations are appended (excluding duplicates) to pods running with this RuntimeClass during admission, ## effectively unioning the set of nodes tolerated by the pod and the RuntimeClass. tolerations: [] ## Affinity is a group of affinity scheduling rules. If specified, the pod's scheduling constraints. ## More info: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#affinity-v1-core resources: {} # resources: # limits: # memory: "2Gi" # cpu: "1" # requests: # memory: "1Gi" # cpu: "500m" ## Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes livenessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes readinessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## PersistentVolumeClaim represents a reference to a PersistentVolumeClaim in the same namespace. ## More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims persistentVolumeClaim: enabled: false accessModes: - "ReadWriteOnce" storageClassName: "-" storage: "20Gi" env: JAVA_OPTS: "-Xms512m -Xmx512m -Xmn256m" api: ## Number of desired pods. This is a pointer to distinguish between explicit zero and not specified. Defaults to 1. replicas: "1" ## The deployment strategy to use to replace existing pods with new ones. strategy: type: "RollingUpdate" rollingUpdate: maxSurge: "25%" maxUnavailable: "25%" ## You can use annotations to attach arbitrary non-identifying metadata to objects. ## Clients such as tools and libraries can retrieve this metadata. annotations: {} ## NodeSelector is a selector which must be true for the pod to fit on a node. ## Selector which must match a node's labels for the pod to be scheduled on that node. ## More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ affinity: {} ## Compute Resources required by this container. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container nodeSelector: {} ## Tolerations are appended (excluding duplicates) to pods running with this RuntimeClass during admission, ## effectively unioning the set of nodes tolerated by the pod and the RuntimeClass. tolerations: [] ## Affinity is a group of affinity scheduling rules. If specified, the pod's scheduling constraints. ## More info: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#affinity-v1-core resources: {} # resources: # limits: # memory: "2Gi" # cpu: "1" # requests: # memory: "1Gi" # cpu: "500m" ## Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes livenessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. ## More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes readinessProbe: enabled: true initialDelaySeconds: "30" periodSeconds: "30" timeoutSeconds: "5" failureThreshold: "3" successThreshold: "1" ## PersistentVolumeClaim represents a reference to a PersistentVolumeClaim in the same namespace. ## More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims persistentVolumeClaim: enabled: false accessModes: - "ReadWriteOnce" storageClassName: "-" storage: "20Gi" service: ## type determines how the Service is exposed. Defaults to ClusterIP. Valid options are ExternalName, ClusterIP, NodePort, and LoadBalancer type: "ClusterIP" ## clusterIP is the IP address of the service and is usually assigned randomly by the master clusterIP: "" ## nodePort is the port on each node on which this api service is exposed when type=NodePort nodePort: "" ## pythonNodePort is the port on each node on which this python api service is exposed when type=NodePort pythonNodePort: "" ## externalIPs is a list of IP addresses for which nodes in the cluster will also accept traffic for this service externalIPs: [] ## externalName is the external reference that kubedns or equivalent will return as a CNAME record for this service, requires Type to be ExternalName externalName: "" ## loadBalancerIP when service.type is LoadBalancer. LoadBalancer will get created with the IP specified in this field loadBalancerIP: "" ## annotations may need to be set when service.type is LoadBalancer ## service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:us-east-1:EXAMPLE_CERT annotations: {} env: JAVA_OPTS: "-Xms512m -Xmx512m -Xmn256m" ingress: enabled: false host: "dolphinscheduler.org" path: "/dolphinscheduler" annotations: {} tls: enabled: false secretName: "dolphinscheduler-tls"