Browse Source

Add config for defaultTenantEnabled (#15391)

3.2.1-prepare
Wenjun Ruan 11 months ago committed by GitHub
parent
commit
6c1e001edf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 76
      docs/docs/en/architecture/configuration.md
  2. 226
      docs/docs/zh/architecture/configuration.md
  3. 3
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/OSUtils.java
  4. 11
      dolphinscheduler-standalone-server/src/main/resources/application.yaml
  5. 28
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/TenantConfig.java
  6. 7
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java
  7. 11
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java
  8. 11
      dolphinscheduler-worker/src/main/resources/application.yaml

76
docs/docs/en/architecture/configuration.md

@ -279,53 +279,55 @@ Location: `api-server/conf/application.yaml`
Location: `master-server/conf/application.yaml`
|Parameters | Default value| Description|
|--|--|--|
|master.listen-port|5678|master listen port|
|master.fetch-command-num|10|the number of commands fetched by master|
|master.pre-exec-threads|10|master prepare execute thread number to limit handle commands in parallel|
|master.exec-threads|100|master execute thread number to limit process instances in parallel|
|master.dispatch-task-number|3|master dispatch task number per batch|
|master.host-selector|lower_weight|master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight|
|master.heartbeat-interval|10|master heartbeat interval, the unit is second|
|master.task-commit-retry-times|5|master commit task retry times|
|master.task-commit-interval|1000|master commit task interval, the unit is millisecond|
|master.state-wheel-interval|5|time to check status|
|master.max-cpu-load-avg|1|master max cpuload avg percentage, only higher than the system cpu load average, master server can schedule. default value 1: will use 100% cpu|
|master.reserved-memory|0.3|master reserved memory, only lower than system available memory, master server can schedule. default value 0.3, only the available memory is higher than 30%, master server can schedule.|
|master.failover-interval|10|failover interval, the unit is minute|
|master.kill-application-when-task-failover|true|whether to kill yarn/k8s application when failover taskInstance|
|master.registry-disconnect-strategy.strategy|stop|Used when the master disconnect from registry, default value: stop. Optional values include stop, waiting|
|master.registry-disconnect-strategy.max-waiting-time|100s|Used when the master disconnect from registry, and the disconnect strategy is waiting, this config means the master will waiting to reconnect to registry in given times, and after the waiting times, if the master still cannot connect to registry, will stop itself, if the value is 0s, the Master will wait infinitely|
|master.worker-group-refresh-interval|10s|The interval to refresh worker group from db to memory|
| Parameters | Default value | Description |
|------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| master.listen-port | 5678 | master listen port |
| master.fetch-command-num | 10 | the number of commands fetched by master |
| master.pre-exec-threads | 10 | master prepare execute thread number to limit handle commands in parallel |
| master.exec-threads | 100 | master execute thread number to limit process instances in parallel |
| master.dispatch-task-number | 3 | master dispatch task number per batch |
| master.host-selector | lower_weight | master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight |
| master.heartbeat-interval | 10 | master heartbeat interval, the unit is second |
| master.task-commit-retry-times | 5 | master commit task retry times |
| master.task-commit-interval | 1000 | master commit task interval, the unit is millisecond |
| master.state-wheel-interval | 5 | time to check status |
| master.max-cpu-load-avg | 1 | master max cpuload avg percentage, only higher than the system cpu load average, master server can schedule. default value 1: will use 100% cpu |
| master.reserved-memory | 0.3 | master reserved memory, only lower than system available memory, master server can schedule. default value 0.3, only the available memory is higher than 30%, master server can schedule. |
| master.failover-interval | 10 | failover interval, the unit is minute |
| master.kill-application-when-task-failover | true | whether to kill yarn/k8s application when failover taskInstance |
| master.registry-disconnect-strategy.strategy | stop | Used when the master disconnect from registry, default value: stop. Optional values include stop, waiting |
| master.registry-disconnect-strategy.max-waiting-time | 100s | Used when the master disconnect from registry, and the disconnect strategy is waiting, this config means the master will waiting to reconnect to registry in given times, and after the waiting times, if the master still cannot connect to registry, will stop itself, if the value is 0s, the Master will wait infinitely |
| master.worker-group-refresh-interval | 10s | The interval to refresh worker group from db to memory |
### Worker Server related configuration
Location: `worker-server/conf/application.yaml`
|Parameters | Default value| Description|
|--|--|--|
|worker.listen-port|1234|worker-service listen port|
|worker.exec-threads|100|worker-service execute thread number, used to limit the number of task instances in parallel|
|worker.heartbeat-interval|10|worker-service heartbeat interval, the unit is second|
|worker.host-weight|100|worker host weight to dispatch tasks|
|worker.tenant-auto-create|true|tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.|
|worker.max-cpu-load-avg|1|worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu.|
|worker.reserved-memory|0.3|worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.3, only the available memory is higher than 30%, worker server can receive task.|
|worker.alert-listen-host|localhost|the alert listen host of worker|
|worker.alert-listen-port|50052|the alert listen port of worker|
|worker.registry-disconnect-strategy.strategy|stop|Used when the worker disconnect from registry, default value: stop. Optional values include stop, waiting|
|worker.registry-disconnect-strategy.max-waiting-time|100s|Used when the worker disconnect from registry, and the disconnect strategy is waiting, this config means the worker will waiting to reconnect to registry in given times, and after the waiting times, if the worker still cannot connect to registry, will stop itself, if the value is 0s, will wait infinitely |
|worker.task-execute-threads-full-policy|REJECT|If REJECT, when the task waiting in the worker reaches exec-threads, it will reject the received task and the Master will redispatch it; If CONTINUE, it will put the task into the worker's execution queue and wait for a free thread to start execution|
| Parameters | Default value | Description |
|------------------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| worker.listen-port | 1234 | worker-service listen port |
| worker.exec-threads | 100 | worker-service execute thread number, used to limit the number of task instances in parallel |
| worker.heartbeat-interval | 10 | worker-service heartbeat interval, the unit is second |
| worker.host-weight | 100 | worker host weight to dispatch tasks |
| worker.max-cpu-load-avg | 1 | worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu. |
| worker.reserved-memory | 0.3 | worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.3, only the available memory is higher than 30%, worker server can receive task. |
| worker.alert-listen-host | localhost | the alert listen host of worker |
| worker.alert-listen-port | 50052 | the alert listen port of worker |
| worker.registry-disconnect-strategy.strategy | stop | Used when the worker disconnect from registry, default value: stop. Optional values include stop, waiting |
| worker.registry-disconnect-strategy.max-waiting-time | 100s | Used when the worker disconnect from registry, and the disconnect strategy is waiting, this config means the worker will waiting to reconnect to registry in given times, and after the waiting times, if the worker still cannot connect to registry, will stop itself, if the value is 0s, will wait infinitely |
| worker.task-execute-threads-full-policy | REJECT | If REJECT, when the task waiting in the worker reaches exec-threads, it will reject the received task and the Master will redispatch it; If CONTINUE, it will put the task into the worker's execution queue and wait for a free thread to start execution |
| worker.tenant-config.auto-create-tenant-enabled | true | tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. |
| worker.tenant-config.distributed-tenant-enabled | false | When this parameter is true, auto-create-tenant-enabled has no effect and will not automatically create tenants |
| worker.tenant-config.default-tenant-enabled | false | If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`. |
### Alert Server related configuration
Location: `alert-server/conf/application.yaml`
|Parameters | Default value| Description|
|--|--|--|
|server.port|50053|the port of Alert Server|
|alert.port|50052|the port of alert|
| Parameters | Default value | Description |
|-------------|---------------|--------------------------|
| server.port | 50053 | the port of Alert Server |
| alert.port | 50052 | the port of alert |
### Quartz related configuration

226
docs/docs/zh/architecture/configuration.md

@ -200,137 +200,145 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId
默认配置如下:
| 参数 | 默认值 | 描述 |
|-----------------------------------------------|--|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 |
| resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE |
| resource.upload.path | /dolphinscheduler | 资源文件存储路径 |
| aws.access.key.id | minioadmin | S3 access key |
| aws.secret.access.key | minioadmin | S3 secret access key |
| aws.region | us-east-1 | S3 区域 |
| aws.s3.endpoint | http://minio:9000 | S3 endpoint地址 |
| hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户 |
| fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录 |
| hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限 |
| java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录 |
| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户 |
| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab |
| kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时 |
| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可 |
| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname |
| development.state | false | 是否处于开发模式 |
| dolphin.scheduler.network.interface.preferred | NONE | 将会被使用的网卡名称 |
| dolphin.scheduler.network.interface.restrict | NONE | 禁止使用的网卡名称 |
| dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网 |
| resource.manager.httpaddress.port | 8088 | resource manager的端口 |
| 参数 | 默认值 | 描述 |
|-----------------------------------------------|--------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 |
| resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE |
| resource.upload.path | /dolphinscheduler | 资源文件存储路径 |
| aws.access.key.id | minioadmin | S3 access key |
| aws.secret.access.key | minioadmin | S3 secret access key |
| aws.region | us-east-1 | S3 区域 |
| aws.s3.endpoint | http://minio:9000 | S3 endpoint地址 |
| hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户 |
| fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录 |
| hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限 |
| java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录 |
| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户 |
| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab |
| kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时 |
| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可 |
| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname |
| development.state | false | 是否处于开发模式 |
| dolphin.scheduler.network.interface.preferred | NONE | 将会被使用的网卡名称 |
| dolphin.scheduler.network.interface.restrict | NONE | 禁止使用的网卡名称 |
| dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网 |
| resource.manager.httpaddress.port | 8088 | resource manager的端口 |
| yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL |
| datasource.encryption.enable | false | 是否启用datasource 加密 |
| datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt |
| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 |
| support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 |
| sudo.enable | true | 是否开启sudo |
| alert.rpc.port | 50052 | Alert Server的RPC端口 |
| zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址 |
| appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效 |
| datasource.encryption.enable | false | 是否启用datasource 加密 |
| datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt |
| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 |
| support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 |
| sudo.enable | true | 是否开启sudo |
| alert.rpc.port | 50052 | Alert Server的RPC端口 |
| zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址 |
| appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效 |
## Api-server相关配置
位置:`api-server/conf/application.yaml`
|参数 |默认值| 描述|
|--|--|--|
|server.port|12345|api服务通讯端口|
|server.servlet.session.timeout|120m|session超时时间|
|server.servlet.context-path|/dolphinscheduler/ |请求路径|
|spring.servlet.multipart.max-file-size|1024MB|最大上传文件大小|
|spring.servlet.multipart.max-request-size|1024MB|最大请求大小|
|server.jetty.max-http-post-size|5000000|jetty服务最大发送请求大小|
|spring.banner.charset|UTF-8|请求编码|
|spring.jackson.time-zone|UTC|设置时区|
|spring.jackson.date-format|"yyyy-MM-dd HH:mm:ss"|设置时间格式|
|spring.messages.basename|i18n/messages|i18n配置|
|security.authentication.type|PASSWORD|权限校验类型|
|security.authentication.ldap.user.admin|read-only-admin|LDAP登陆时,系统管理员账号|
|security.authentication.ldap.urls|ldap://ldap.forumsys.com:389/|LDAP urls|
|security.authentication.ldap.base.dn|dc=example,dc=com|LDAP base dn|
|security.authentication.ldap.username|cn=read-only-admin,dc=example,dc=com|LDAP账号|
|security.authentication.ldap.password|password|LDAP密码|
|security.authentication.ldap.user.identity-attribute|uid|LDAP用户身份标识字段名|
|security.authentication.ldap.user.email-attribute|mail|LDAP邮箱字段名|
|security.authentication.ldap.user.not-exist-action|CREATE|当通过LDAP登陆时用户不存在的操作,默认值是: CREATE,可选值:CREATE、DENY|
|security.authentication.ldap.ssl.enable|false|LDAP ssl开关|
|security.authentication.ldap.ssl.trust-store|ldapkeystore.jks|LDAP jks文件绝对路径|
|security.authentication.ldap.ssl.trust-store-password|password|LDAP jks密码|
|security.authentication.casdoor.user.admin||Casdoor登陆时,系统管理员账号|
|casdoor.endpoint||Casdoor服务器URL|
|casdoor.client-id||Casdoor中的ID|
|casdoor.client-secret||Casdoor中的密钥|
|casdoor.certificate||Casdoor中的证书|
|casdoor.organization-name||Casdoor中的组织名称|
|casdoor.application-name||Casdoor中的应用名称|
|casdoor.redirect-url||dolphinscheduler登录URL|
|api.traffic.control.global.switch|false|流量控制全局开关|
|api.traffic.control.max-global-qps-rate|300|全局最大请求数/秒|
|api.traffic.control.tenant-switch|false|流量控制租户开关|
|api.traffic.control.default-tenant-qps-rate|10|默认租户最大请求数/秒限制|
|api.traffic.control.customize-tenant-qps-rate||自定义租户最大请求数/秒限制|
| 参数 | 默认值 | 描述 |
|-------------------------------------------------------|--------------------------------------|-------------------------------------------------|
| server.port | 12345 | api服务通讯端口 |
| server.servlet.session.timeout | 120m | session超时时间 |
| server.servlet.context-path | /dolphinscheduler/ | 请求路径 |
| spring.servlet.multipart.max-file-size | 1024MB | 最大上传文件大小 |
| spring.servlet.multipart.max-request-size | 1024MB | 最大请求大小 |
| server.jetty.max-http-post-size | 5000000 | jetty服务最大发送请求大小 |
| spring.banner.charset | UTF-8 | 请求编码 |
| spring.jackson.time-zone | UTC | 设置时区 |
| spring.jackson.date-format | "yyyy-MM-dd HH:mm:ss" | 设置时间格式 |
| spring.messages.basename | i18n/messages | i18n配置 |
| security.authentication.type | PASSWORD | 权限校验类型 |
| security.authentication.ldap.user.admin | read-only-admin | LDAP登陆时,系统管理员账号 |
| security.authentication.ldap.urls | ldap://ldap.forumsys.com:389/ | LDAP urls |
| security.authentication.ldap.base.dn | dc=example,dc=com | LDAP base dn |
| security.authentication.ldap.username | cn=read-only-admin,dc=example,dc=com | LDAP账号 |
| security.authentication.ldap.password | password | LDAP密码 |
| security.authentication.ldap.user.identity-attribute | uid | LDAP用户身份标识字段名 |
| security.authentication.ldap.user.email-attribute | mail | LDAP邮箱字段名 |
| security.authentication.ldap.user.not-exist-action | CREATE | 当通过LDAP登陆时用户不存在的操作,默认值是: CREATE,可选值:CREATE、DENY |
| security.authentication.ldap.ssl.enable | false | LDAP ssl开关 |
| security.authentication.ldap.ssl.trust-store | ldapkeystore.jks | LDAP jks文件绝对路径 |
| security.authentication.ldap.ssl.trust-store-password | password | LDAP jks密码 |
| security.authentication.casdoor.user.admin | | Casdoor登陆时,系统管理员账号 |
| casdoor.endpoint | | Casdoor服务器URL |
| casdoor.client-id | | Casdoor中的ID |
| casdoor.client-secret | | Casdoor中的密钥 |
| casdoor.certificate | | Casdoor中的证书 |
| casdoor.organization-name | | Casdoor中的组织名称 |
| casdoor.application-name | | Casdoor中的应用名称 |
| casdoor.redirect-url | | dolphinscheduler登录URL |
| api.traffic.control.global.switch | false | 流量控制全局开关 |
| api.traffic.control.max-global-qps-rate | 300 | 全局最大请求数/秒 |
| api.traffic.control.tenant-switch | false | 流量控制租户开关 |
| api.traffic.control.default-tenant-qps-rate | 10 | 默认租户最大请求数/秒限制 |
| api.traffic.control.customize-tenant-qps-rate | | 自定义租户最大请求数/秒限制 |
## Master Server相关配置
位置:`master-server/conf/application.yaml`
|参数 |默认值| 描述|
|--|--|--|
|master.listen-port|5678|master监听端口|
|master.fetch-command-num|10|master拉取command数量|
|master.pre-exec-threads|10|master准备执行任务的数量,用于限制并行的command|
|master.exec-threads|100|master工作线程数量,用于限制并行的流程实例数量|
|master.dispatch-task-number|3|master每个批次的派发任务数量|
|master.host-selector|lower_weight|master host选择器,用于选择合适的worker执行任务,可选值: random, round_robin, lower_weight|
|master.heartbeat-interval|10|master心跳间隔,单位为秒|
|master.task-commit-retry-times|5|任务重试次数|
|master.task-commit-interval|1000|任务提交间隔,单位为毫秒|
|master.state-wheel-interval|5|轮询检查状态时间|
|master.max-cpu-load-avg|1|master最大cpuload均值,只有高于系统cpuload均值时,master服务才能调度任务. 默认值为1: 会使用100%的CPU|
|master.reserved-memory|0.3|master预留内存,只有低于系统可用内存时,master服务才能调度任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流|
|master.failover-interval|10|failover间隔,单位为分钟|
|master.kill-application-when-task-failover|true|当任务实例failover时,是否kill掉yarn或k8s application|
|master.registry-disconnect-strategy.strategy|stop|当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting|
|master.registry-disconnect-strategy.max-waiting-time|100s|当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 |
|master.master.worker-group-refresh-interval|10s|定期将workerGroup从数据库中同步到内存的时间间隔|
| 参数 | 默认值 | 描述 |
|--------------------------------------------------------|--------------|-----------------------------------------------------------------------------------|
| master.listen-port | 5678 | master监听端口 |
| master.fetch-command-num | 10 | master拉取command数量 |
| master.pre-exec-threads | 10 | master准备执行任务的数量,用于限制并行的command |
| master.exec-threads | 100 | master工作线程数量,用于限制并行的流程实例数量 |
| master.dispatch-task-number | 3 | master每个批次的派发任务数量 |
| master.host-selector | lower_weight | master host选择器,用于选择合适的worker执行任务,可选值: random, round_robin, lower_weight |
| master.heartbeat-interval | 10 | master心跳间隔,单位为秒 |
| master.task-commit-retry-times | 5 | 任务重试次数 |
| master.task-commit-interval | 1000 | 任务提交间隔,单位为毫秒 |
| master.state-wheel-interval | 5 | 轮询检查状态时间 |
| master.max-cpu-load-avg | 1 | master最大cpuload均值,只有高于系统cpuload均值时,master服务才能调度任务. 默认值为1: 会使用100%的CPU |
| master.reserved-memory | 0.3 | master预留内存,只有低于系统可用内存时,master服务才能调度任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流 |
| master.failover-interval | 10 | failover间隔,单位为分钟 |
| master.kill-application-when-task-failover | true | 当任务实例failover时,是否kill掉yarn或k8s application |
| master.registry-disconnect-strategy.strategy | stop | 当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting |
| master.registry-disconnect-strategy.max-waiting-time | 100s | 当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, |
| 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 |
| master.master.worker-group-refresh-interval | 10s | 定期将workerGroup从数据库中同步到内存的时间间隔 |
## Worker Server相关配置
位置:`worker-server/conf/application.yaml`
|参数 |默认值| 描述|
|--|--|--|
|worker.listen-port|1234|worker监听端口|
|worker.exec-threads|100|worker工作线程数量,用于限制并行的任务实例数量|
|worker.heartbeat-interval|10|worker心跳间隔,单位为秒|
|worker.host-weight|100|派发任务时,worker主机的权重|
|worker.tenant-auto-create|true|租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。|
|worker.max-cpu-load-avg|1|worker最大cpuload均值,只有高于系统cpuload均值时,worker服务才能被派发任务. 默认值为1: 会使用100%的CPU|
|worker.reserved-memory|0.3|worker预留内存,只有低于系统可用内存时,worker服务才能被派发任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流|
|worker.alert-listen-host|localhost|alert监听host|
|worker.alert-listen-port|50052|alert监听端口|
|worker.registry-disconnect-strategy.strategy|stop|当Worker与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting|
|worker.registry-disconnect-strategy.max-waiting-time|100s|当Worker与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Worker与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Worker会丢弃kill正在执行的任务。值为0表示会无限期等待 |
|worker.task-execute-threads-full-policy|REJECT|如果是 REJECT, 当Worker中等待队列中的任务数达到exec-threads时, Worker将会拒绝接下来新接收的任务,Master将会重新分发该任务; 如果是 CONTINUE, Worker将会接收任务,放入等待队列中等待空闲线程去执行该任务|
| 参数 | 默认值 | 描述 |
|------------------------------------------------------|-----------|-------------------------------------------------------------------------------------------------------------------------------------------|
| worker.listen-port | 1234 | worker监听端口 |
| worker.exec-threads | 100 | worker工作线程数量,用于限制并行的任务实例数量 |
| worker.heartbeat-interval | 10 | worker心跳间隔,单位为秒 |
| worker.host-weight | 100 | 派发任务时,worker主机的权重 |
| worker.tenant-auto-create | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 |
| worker.max-cpu-load-avg | 1 | worker最大cpuload均值,只有高于系统cpuload均值时,worker服务才能被派发任务. 默认值为1: 会使用100%的CPU |
| worker.reserved-memory | 0.3 | worker预留内存,只有低于系统可用内存时,worker服务才能被派发任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流 |
| worker.alert-listen-host | localhost | alert监听host |
| worker.alert-listen-port | 50052 | alert监听端口 |
| worker.registry-disconnect-strategy.strategy | stop | 当Worker与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting |
| worker.registry-disconnect-strategy.max-waiting-time | 100s | 当Worker与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Worker与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Worker会丢弃kill正在执行的任务。值为0表示会无限期等待 |
| worker.task-execute-threads-full-policy | REJECT | 如果是 REJECT, 当Worker中等待队列中的任务数达到exec-threads时, Worker将会拒绝接下来新接收的任务,Master将会重新分发该任务; 如果是 CONTINUE, Worker将会接收任务,放入等待队列中等待空闲线程去执行该任务 |
| worker.tenant-config.auto-create-tenant-enabled | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 |
| worker.tenant-config.distributed-tenant-enabled | false | 如果设置为true, auto-create-tenant-enabled 将会不起作用。 |
| worker.tenant-config.default-tenant-enabled | false | 如果设置为true, 将会使用worker服务启动用户作为 `default` 租户。 |
## Alert Server相关配置
位置:`alert-server/conf/application.yaml`
|参数 |默认值| 描述|
|--|--|--|
|server.port|50053|Alert Server监听端口|
|alert.port|50052|alert监听端口|
| 参数 | 默认值 | 描述 |
|-------------|-------|------------------|
| server.port | 50053 | Alert Server监听端口 |
| alert.port | 50052 | alert监听端口 |
## Quartz相关配置
这里面主要是quartz配置,请结合实际业务场景&资源进行配置,本文暂时不做展开,配置文件位置:
|服务名称| 配置文件 |
|--|--|
|Master Server | `master-server/conf/application.yaml`|
|Api Server| `api-server/conf/application.yaml`|
| 服务名称 | 配置文件 |
|---------------|---------------------------------------|
| Master Server | `master-server/conf/application.yaml` |
| Api Server | `api-server/conf/application.yaml` |
默认配置如下:

3
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/OSUtils.java

@ -18,7 +18,6 @@
package org.apache.dolphinscheduler.common.utils;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.constants.TenantConstants;
import org.apache.dolphinscheduler.common.shell.ShellExecutor;
import oshi.SystemInfo;
@ -400,8 +399,6 @@ public class OSUtils {
if (!isSudoEnable() || StringUtils.isEmpty(tenantCode)) {
return command;
}
tenantCode = TenantConstants.DEFAULT_TENANT_CODE.equals(tenantCode) ? TenantConstants.BOOTSTRAPT_SYSTEM_USER
: tenantCode;
return String.format("sudo -u %s %s", tenantCode, command);
}

11
dolphinscheduler-standalone-server/src/main/resources/application.yaml

@ -207,15 +207,18 @@ worker:
heartbeat-interval: 10s
# worker host weight to dispatch tasks, default value 100
host-weight: 100
# tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.
tenant-auto-create: true
#Scenes to be used for distributed users.For example,users created by FreeIpa are stored in LDAP.This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants.
tenant-distributed-user: false
# worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu.
max-cpu-load-avg: 1
# worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.1, only the available memory is higher than 10%, worker server can receive task.
reserved-memory: 0.1
task-execute-threads-full-policy: REJECT
tenant-config:
# tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.
auto-create-tenant-enabled: true
# Scenes to be used for distributed users. For example, users created by FreeIpa are stored in LDAP. This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants.
distributed-tenant: false
# If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`;
default-tenant-enabled: true
alert:
port: 50052

28
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/TenantConfig.java

@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.worker.config;
import lombok.Data;
@Data
public class TenantConfig {
private boolean autoCreateTenantEnabled = true;
private boolean distributedTenantEnabled = false;
private boolean defaultTenantEnabled = false;
}

7
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java

@ -45,8 +45,6 @@ public class WorkerConfig implements Validator {
private int execThreads = 10;
private Duration heartbeatInterval = Duration.ofSeconds(10);
private int hostWeight = 100;
private boolean tenantAutoCreate = true;
private boolean tenantDistributedUser = false;
private int maxCpuLoadAvg = -1;
private double reservedMemory = 0.1;
private ConnectStrategyProperties registryDisconnectStrategy = new ConnectStrategyProperties();
@ -59,6 +57,8 @@ public class WorkerConfig implements Validator {
private TaskExecuteThreadsFullPolicy taskExecuteThreadsFullPolicy = TaskExecuteThreadsFullPolicy.REJECT;
private TenantConfig tenantConfig = new TenantConfig();
@Override
public boolean supports(Class<?> clazz) {
return WorkerConfig.class.isAssignableFrom(clazz);
@ -92,8 +92,7 @@ public class WorkerConfig implements Validator {
"\n exec-threads -> " + execThreads +
"\n heartbeat-interval -> " + heartbeatInterval +
"\n host-weight -> " + hostWeight +
"\n tenant-auto-create -> " + tenantAutoCreate +
"\n tenant-distributed-user -> " + tenantDistributedUser +
"\n tenantConfig -> " + tenantConfig +
"\n max-cpu-load-avg -> " + maxCpuLoadAvg +
"\n reserved-memory -> " + reservedMemory +
"\n registry-disconnect-strategy -> " + registryDisconnectStrategy +

11
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java

@ -28,6 +28,7 @@ import org.apache.dolphinscheduler.plugin.task.api.model.ResourceInfo;
import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters;
import org.apache.dolphinscheduler.plugin.task.api.parameters.ParametersNode;
import org.apache.dolphinscheduler.plugin.task.api.resource.ResourceContext;
import org.apache.dolphinscheduler.server.worker.config.TenantConfig;
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig;
import org.apache.dolphinscheduler.server.worker.metrics.WorkerServerMetrics;
@ -47,9 +48,11 @@ public class TaskExecutionContextUtils {
public static String getOrCreateTenant(WorkerConfig workerConfig, TaskExecutionContext taskExecutionContext) {
try {
TenantConfig tenantConfig = workerConfig.getTenantConfig();
String tenantCode = taskExecutionContext.getTenantCode();
if (TenantConstants.DEFAULT_TENANT_CODE.equals(tenantCode)) {
log.info("Current tenant is default tenant, will use {} to execute the task",
if (TenantConstants.DEFAULT_TENANT_CODE.equals(tenantCode) && tenantConfig.isDefaultTenantEnabled()) {
log.info("Current tenant is default tenant, will use bootstrap user: {} to execute the task",
TenantConstants.BOOTSTRAPT_SYSTEM_USER);
return TenantConstants.BOOTSTRAPT_SYSTEM_USER;
}
@ -57,10 +60,10 @@ public class TaskExecutionContextUtils {
// if Using distributed is true and Currently supported systems are linux,Should not let it
// automatically
// create tenants,so TenantAutoCreate has no effect
if (workerConfig.isTenantDistributedUser() && SystemUtils.IS_OS_LINUX) {
if (tenantConfig.isDistributedTenantEnabled() && SystemUtils.IS_OS_LINUX) {
// use the id command to judge in linux
osUserExistFlag = OSUtils.existTenantCodeInLinux(tenantCode);
} else if (OSUtils.isSudoEnable() && workerConfig.isTenantAutoCreate()) {
} else if (OSUtils.isSudoEnable() && tenantConfig.isAutoCreateTenantEnabled()) {
// if not exists this user, then create
OSUtils.createUserIfAbsent(tenantCode);
osUserExistFlag = OSUtils.getUserList().contains(tenantCode);

11
dolphinscheduler-worker/src/main/resources/application.yaml

@ -47,10 +47,6 @@ worker:
heartbeat-interval: 10s
# worker host weight to dispatch tasks, default value 100
host-weight: 100
# tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.
tenant-auto-create: true
#Scenes to be used for distributed users.For example,users created by FreeIpa are stored in LDAP.This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants.
tenant-distributed-user: false
# worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu.
max-cpu-load-avg: 1
# worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.3, only the available memory is higher than 30%, worker server can receive task.
@ -61,6 +57,13 @@ worker:
# The max waiting time to reconnect to registry if you set the strategy to waiting
max-waiting-time: 100s
task-execute-threads-full-policy: REJECT
tenant-config:
# tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.
auto-create-tenant-enabled: true
# Scenes to be used for distributed users. For example, users created by FreeIpa are stored in LDAP. This parameter only applies to Linux, When this parameter is true, auto-create-tenant-enabled has no effect and will not automatically create tenants.
distributed-tenant-enabled: false
# If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`.
default-tenant-enabled: false
server:
port: 1235

Loading…
Cancel
Save