From 6c1e001edfc088d371426955d46d74f9bfbf7576 Mon Sep 17 00:00:00 2001 From: Wenjun Ruan Date: Sat, 6 Jan 2024 18:26:22 +0800 Subject: [PATCH] Add config for defaultTenantEnabled (#15391) --- docs/docs/en/architecture/configuration.md | 76 +++--- docs/docs/zh/architecture/configuration.md | 226 +++++++++--------- .../common/utils/OSUtils.java | 3 - .../src/main/resources/application.yaml | 11 +- .../server/worker/config/TenantConfig.java | 28 +++ .../server/worker/config/WorkerConfig.java | 7 +- .../utils/TaskExecutionContextUtils.java | 11 +- .../src/main/resources/application.yaml | 11 +- 8 files changed, 208 insertions(+), 165 deletions(-) create mode 100644 dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/TenantConfig.java diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index d8ebfafcdb..01f925055f 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -279,53 +279,55 @@ Location: `api-server/conf/application.yaml` Location: `master-server/conf/application.yaml` -|Parameters | Default value| Description| -|--|--|--| -|master.listen-port|5678|master listen port| -|master.fetch-command-num|10|the number of commands fetched by master| -|master.pre-exec-threads|10|master prepare execute thread number to limit handle commands in parallel| -|master.exec-threads|100|master execute thread number to limit process instances in parallel| -|master.dispatch-task-number|3|master dispatch task number per batch| -|master.host-selector|lower_weight|master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight| -|master.heartbeat-interval|10|master heartbeat interval, the unit is second| -|master.task-commit-retry-times|5|master commit task retry times| -|master.task-commit-interval|1000|master commit task interval, the unit is millisecond| -|master.state-wheel-interval|5|time to check status| -|master.max-cpu-load-avg|1|master max cpuload avg percentage, only higher than the system cpu load average, master server can schedule. default value 1: will use 100% cpu| -|master.reserved-memory|0.3|master reserved memory, only lower than system available memory, master server can schedule. default value 0.3, only the available memory is higher than 30%, master server can schedule.| -|master.failover-interval|10|failover interval, the unit is minute| -|master.kill-application-when-task-failover|true|whether to kill yarn/k8s application when failover taskInstance| -|master.registry-disconnect-strategy.strategy|stop|Used when the master disconnect from registry, default value: stop. Optional values include stop, waiting| -|master.registry-disconnect-strategy.max-waiting-time|100s|Used when the master disconnect from registry, and the disconnect strategy is waiting, this config means the master will waiting to reconnect to registry in given times, and after the waiting times, if the master still cannot connect to registry, will stop itself, if the value is 0s, the Master will wait infinitely| -|master.worker-group-refresh-interval|10s|The interval to refresh worker group from db to memory| +| Parameters | Default value | Description | +|------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| master.listen-port | 5678 | master listen port | +| master.fetch-command-num | 10 | the number of commands fetched by master | +| master.pre-exec-threads | 10 | master prepare execute thread number to limit handle commands in parallel | +| master.exec-threads | 100 | master execute thread number to limit process instances in parallel | +| master.dispatch-task-number | 3 | master dispatch task number per batch | +| master.host-selector | lower_weight | master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight | +| master.heartbeat-interval | 10 | master heartbeat interval, the unit is second | +| master.task-commit-retry-times | 5 | master commit task retry times | +| master.task-commit-interval | 1000 | master commit task interval, the unit is millisecond | +| master.state-wheel-interval | 5 | time to check status | +| master.max-cpu-load-avg | 1 | master max cpuload avg percentage, only higher than the system cpu load average, master server can schedule. default value 1: will use 100% cpu | +| master.reserved-memory | 0.3 | master reserved memory, only lower than system available memory, master server can schedule. default value 0.3, only the available memory is higher than 30%, master server can schedule. | +| master.failover-interval | 10 | failover interval, the unit is minute | +| master.kill-application-when-task-failover | true | whether to kill yarn/k8s application when failover taskInstance | +| master.registry-disconnect-strategy.strategy | stop | Used when the master disconnect from registry, default value: stop. Optional values include stop, waiting | +| master.registry-disconnect-strategy.max-waiting-time | 100s | Used when the master disconnect from registry, and the disconnect strategy is waiting, this config means the master will waiting to reconnect to registry in given times, and after the waiting times, if the master still cannot connect to registry, will stop itself, if the value is 0s, the Master will wait infinitely | +| master.worker-group-refresh-interval | 10s | The interval to refresh worker group from db to memory | ### Worker Server related configuration Location: `worker-server/conf/application.yaml` -|Parameters | Default value| Description| -|--|--|--| -|worker.listen-port|1234|worker-service listen port| -|worker.exec-threads|100|worker-service execute thread number, used to limit the number of task instances in parallel| -|worker.heartbeat-interval|10|worker-service heartbeat interval, the unit is second| -|worker.host-weight|100|worker host weight to dispatch tasks| -|worker.tenant-auto-create|true|tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.| -|worker.max-cpu-load-avg|1|worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu.| -|worker.reserved-memory|0.3|worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.3, only the available memory is higher than 30%, worker server can receive task.| -|worker.alert-listen-host|localhost|the alert listen host of worker| -|worker.alert-listen-port|50052|the alert listen port of worker| -|worker.registry-disconnect-strategy.strategy|stop|Used when the worker disconnect from registry, default value: stop. Optional values include stop, waiting| -|worker.registry-disconnect-strategy.max-waiting-time|100s|Used when the worker disconnect from registry, and the disconnect strategy is waiting, this config means the worker will waiting to reconnect to registry in given times, and after the waiting times, if the worker still cannot connect to registry, will stop itself, if the value is 0s, will wait infinitely | -|worker.task-execute-threads-full-policy|REJECT|If REJECT, when the task waiting in the worker reaches exec-threads, it will reject the received task and the Master will redispatch it; If CONTINUE, it will put the task into the worker's execution queue and wait for a free thread to start execution| +| Parameters | Default value | Description | +|------------------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| worker.listen-port | 1234 | worker-service listen port | +| worker.exec-threads | 100 | worker-service execute thread number, used to limit the number of task instances in parallel | +| worker.heartbeat-interval | 10 | worker-service heartbeat interval, the unit is second | +| worker.host-weight | 100 | worker host weight to dispatch tasks | +| worker.max-cpu-load-avg | 1 | worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu. | +| worker.reserved-memory | 0.3 | worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.3, only the available memory is higher than 30%, worker server can receive task. | +| worker.alert-listen-host | localhost | the alert listen host of worker | +| worker.alert-listen-port | 50052 | the alert listen port of worker | +| worker.registry-disconnect-strategy.strategy | stop | Used when the worker disconnect from registry, default value: stop. Optional values include stop, waiting | +| worker.registry-disconnect-strategy.max-waiting-time | 100s | Used when the worker disconnect from registry, and the disconnect strategy is waiting, this config means the worker will waiting to reconnect to registry in given times, and after the waiting times, if the worker still cannot connect to registry, will stop itself, if the value is 0s, will wait infinitely | +| worker.task-execute-threads-full-policy | REJECT | If REJECT, when the task waiting in the worker reaches exec-threads, it will reject the received task and the Master will redispatch it; If CONTINUE, it will put the task into the worker's execution queue and wait for a free thread to start execution | +| worker.tenant-config.auto-create-tenant-enabled | true | tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. | +| worker.tenant-config.distributed-tenant-enabled | false | When this parameter is true, auto-create-tenant-enabled has no effect and will not automatically create tenants | +| worker.tenant-config.default-tenant-enabled | false | If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`. | ### Alert Server related configuration Location: `alert-server/conf/application.yaml` -|Parameters | Default value| Description| -|--|--|--| -|server.port|50053|the port of Alert Server| -|alert.port|50052|the port of alert| +| Parameters | Default value | Description | +|-------------|---------------|--------------------------| +| server.port | 50053 | the port of Alert Server | +| alert.port | 50052 | the port of alert | ### Quartz related configuration diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index b058f1f358..719a58c7cd 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -200,137 +200,145 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId 默认配置如下: -| 参数 | 默认值 | 描述 | -|-----------------------------------------------|--|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 | -| resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE | -| resource.upload.path | /dolphinscheduler | 资源文件存储路径 | -| aws.access.key.id | minioadmin | S3 access key | -| aws.secret.access.key | minioadmin | S3 secret access key | -| aws.region | us-east-1 | S3 区域 | -| aws.s3.endpoint | http://minio:9000 | S3 endpoint地址 | -| hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户 | -| fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录 | -| hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限 | -| java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录 | -| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户 | -| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab | -| kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时 | -| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可 | -| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname | -| development.state | false | 是否处于开发模式 | -| dolphin.scheduler.network.interface.preferred | NONE | 将会被使用的网卡名称 | -| dolphin.scheduler.network.interface.restrict | NONE | 禁止使用的网卡名称 | -| dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网 | -| resource.manager.httpaddress.port | 8088 | resource manager的端口 | +| 参数 | 默认值 | 描述 | +|-----------------------------------------------|--------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 | +| resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE | +| resource.upload.path | /dolphinscheduler | 资源文件存储路径 | +| aws.access.key.id | minioadmin | S3 access key | +| aws.secret.access.key | minioadmin | S3 secret access key | +| aws.region | us-east-1 | S3 区域 | +| aws.s3.endpoint | http://minio:9000 | S3 endpoint地址 | +| hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户 | +| fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录 | +| hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限 | +| java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录 | +| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户 | +| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab | +| kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时 | +| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可 | +| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname | +| development.state | false | 是否处于开发模式 | +| dolphin.scheduler.network.interface.preferred | NONE | 将会被使用的网卡名称 | +| dolphin.scheduler.network.interface.restrict | NONE | 禁止使用的网卡名称 | +| dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网 | +| resource.manager.httpaddress.port | 8088 | resource manager的端口 | | yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL | -| datasource.encryption.enable | false | 是否启用datasource 加密 | -| datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt | -| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 | -| support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 | -| sudo.enable | true | 是否开启sudo | -| alert.rpc.port | 50052 | Alert Server的RPC端口 | -| zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址 | -| appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效 | +| datasource.encryption.enable | false | 是否启用datasource 加密 | +| datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt | +| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 | +| support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 | +| sudo.enable | true | 是否开启sudo | +| alert.rpc.port | 50052 | Alert Server的RPC端口 | +| zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址 | +| appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效 | ## Api-server相关配置 位置:`api-server/conf/application.yaml` -|参数 |默认值| 描述| -|--|--|--| -|server.port|12345|api服务通讯端口| -|server.servlet.session.timeout|120m|session超时时间| -|server.servlet.context-path|/dolphinscheduler/ |请求路径| -|spring.servlet.multipart.max-file-size|1024MB|最大上传文件大小| -|spring.servlet.multipart.max-request-size|1024MB|最大请求大小| -|server.jetty.max-http-post-size|5000000|jetty服务最大发送请求大小| -|spring.banner.charset|UTF-8|请求编码| -|spring.jackson.time-zone|UTC|设置时区| -|spring.jackson.date-format|"yyyy-MM-dd HH:mm:ss"|设置时间格式| -|spring.messages.basename|i18n/messages|i18n配置| -|security.authentication.type|PASSWORD|权限校验类型| -|security.authentication.ldap.user.admin|read-only-admin|LDAP登陆时,系统管理员账号| -|security.authentication.ldap.urls|ldap://ldap.forumsys.com:389/|LDAP urls| -|security.authentication.ldap.base.dn|dc=example,dc=com|LDAP base dn| -|security.authentication.ldap.username|cn=read-only-admin,dc=example,dc=com|LDAP账号| -|security.authentication.ldap.password|password|LDAP密码| -|security.authentication.ldap.user.identity-attribute|uid|LDAP用户身份标识字段名| -|security.authentication.ldap.user.email-attribute|mail|LDAP邮箱字段名| -|security.authentication.ldap.user.not-exist-action|CREATE|当通过LDAP登陆时用户不存在的操作,默认值是: CREATE,可选值:CREATE、DENY| -|security.authentication.ldap.ssl.enable|false|LDAP ssl开关| -|security.authentication.ldap.ssl.trust-store|ldapkeystore.jks|LDAP jks文件绝对路径| -|security.authentication.ldap.ssl.trust-store-password|password|LDAP jks密码| -|security.authentication.casdoor.user.admin||Casdoor登陆时,系统管理员账号| -|casdoor.endpoint||Casdoor服务器URL| -|casdoor.client-id||Casdoor中的ID| -|casdoor.client-secret||Casdoor中的密钥| -|casdoor.certificate||Casdoor中的证书| -|casdoor.organization-name||Casdoor中的组织名称| -|casdoor.application-name||Casdoor中的应用名称| -|casdoor.redirect-url||dolphinscheduler登录URL| -|api.traffic.control.global.switch|false|流量控制全局开关| -|api.traffic.control.max-global-qps-rate|300|全局最大请求数/秒| -|api.traffic.control.tenant-switch|false|流量控制租户开关| -|api.traffic.control.default-tenant-qps-rate|10|默认租户最大请求数/秒限制| -|api.traffic.control.customize-tenant-qps-rate||自定义租户最大请求数/秒限制| + +| 参数 | 默认值 | 描述 | +|-------------------------------------------------------|--------------------------------------|-------------------------------------------------| +| server.port | 12345 | api服务通讯端口 | +| server.servlet.session.timeout | 120m | session超时时间 | +| server.servlet.context-path | /dolphinscheduler/ | 请求路径 | +| spring.servlet.multipart.max-file-size | 1024MB | 最大上传文件大小 | +| spring.servlet.multipart.max-request-size | 1024MB | 最大请求大小 | +| server.jetty.max-http-post-size | 5000000 | jetty服务最大发送请求大小 | +| spring.banner.charset | UTF-8 | 请求编码 | +| spring.jackson.time-zone | UTC | 设置时区 | +| spring.jackson.date-format | "yyyy-MM-dd HH:mm:ss" | 设置时间格式 | +| spring.messages.basename | i18n/messages | i18n配置 | +| security.authentication.type | PASSWORD | 权限校验类型 | +| security.authentication.ldap.user.admin | read-only-admin | LDAP登陆时,系统管理员账号 | +| security.authentication.ldap.urls | ldap://ldap.forumsys.com:389/ | LDAP urls | +| security.authentication.ldap.base.dn | dc=example,dc=com | LDAP base dn | +| security.authentication.ldap.username | cn=read-only-admin,dc=example,dc=com | LDAP账号 | +| security.authentication.ldap.password | password | LDAP密码 | +| security.authentication.ldap.user.identity-attribute | uid | LDAP用户身份标识字段名 | +| security.authentication.ldap.user.email-attribute | mail | LDAP邮箱字段名 | +| security.authentication.ldap.user.not-exist-action | CREATE | 当通过LDAP登陆时用户不存在的操作,默认值是: CREATE,可选值:CREATE、DENY | +| security.authentication.ldap.ssl.enable | false | LDAP ssl开关 | +| security.authentication.ldap.ssl.trust-store | ldapkeystore.jks | LDAP jks文件绝对路径 | +| security.authentication.ldap.ssl.trust-store-password | password | LDAP jks密码 | +| security.authentication.casdoor.user.admin | | Casdoor登陆时,系统管理员账号 | +| casdoor.endpoint | | Casdoor服务器URL | +| casdoor.client-id | | Casdoor中的ID | +| casdoor.client-secret | | Casdoor中的密钥 | +| casdoor.certificate | | Casdoor中的证书 | +| casdoor.organization-name | | Casdoor中的组织名称 | +| casdoor.application-name | | Casdoor中的应用名称 | +| casdoor.redirect-url | | dolphinscheduler登录URL | +| api.traffic.control.global.switch | false | 流量控制全局开关 | +| api.traffic.control.max-global-qps-rate | 300 | 全局最大请求数/秒 | +| api.traffic.control.tenant-switch | false | 流量控制租户开关 | +| api.traffic.control.default-tenant-qps-rate | 10 | 默认租户最大请求数/秒限制 | +| api.traffic.control.customize-tenant-qps-rate | | 自定义租户最大请求数/秒限制 | ## Master Server相关配置 位置:`master-server/conf/application.yaml` -|参数 |默认值| 描述| -|--|--|--| -|master.listen-port|5678|master监听端口| -|master.fetch-command-num|10|master拉取command数量| -|master.pre-exec-threads|10|master准备执行任务的数量,用于限制并行的command| -|master.exec-threads|100|master工作线程数量,用于限制并行的流程实例数量| -|master.dispatch-task-number|3|master每个批次的派发任务数量| -|master.host-selector|lower_weight|master host选择器,用于选择合适的worker执行任务,可选值: random, round_robin, lower_weight| -|master.heartbeat-interval|10|master心跳间隔,单位为秒| -|master.task-commit-retry-times|5|任务重试次数| -|master.task-commit-interval|1000|任务提交间隔,单位为毫秒| -|master.state-wheel-interval|5|轮询检查状态时间| -|master.max-cpu-load-avg|1|master最大cpuload均值,只有高于系统cpuload均值时,master服务才能调度任务. 默认值为1: 会使用100%的CPU| -|master.reserved-memory|0.3|master预留内存,只有低于系统可用内存时,master服务才能调度任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流| -|master.failover-interval|10|failover间隔,单位为分钟| -|master.kill-application-when-task-failover|true|当任务实例failover时,是否kill掉yarn或k8s application| -|master.registry-disconnect-strategy.strategy|stop|当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting| -|master.registry-disconnect-strategy.max-waiting-time|100s|当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 | -|master.master.worker-group-refresh-interval|10s|定期将workerGroup从数据库中同步到内存的时间间隔| + +| 参数 | 默认值 | 描述 | +|--------------------------------------------------------|--------------|-----------------------------------------------------------------------------------| +| master.listen-port | 5678 | master监听端口 | +| master.fetch-command-num | 10 | master拉取command数量 | +| master.pre-exec-threads | 10 | master准备执行任务的数量,用于限制并行的command | +| master.exec-threads | 100 | master工作线程数量,用于限制并行的流程实例数量 | +| master.dispatch-task-number | 3 | master每个批次的派发任务数量 | +| master.host-selector | lower_weight | master host选择器,用于选择合适的worker执行任务,可选值: random, round_robin, lower_weight | +| master.heartbeat-interval | 10 | master心跳间隔,单位为秒 | +| master.task-commit-retry-times | 5 | 任务重试次数 | +| master.task-commit-interval | 1000 | 任务提交间隔,单位为毫秒 | +| master.state-wheel-interval | 5 | 轮询检查状态时间 | +| master.max-cpu-load-avg | 1 | master最大cpuload均值,只有高于系统cpuload均值时,master服务才能调度任务. 默认值为1: 会使用100%的CPU | +| master.reserved-memory | 0.3 | master预留内存,只有低于系统可用内存时,master服务才能调度任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流 | +| master.failover-interval | 10 | failover间隔,单位为分钟 | +| master.kill-application-when-task-failover | true | 当任务实例failover时,是否kill掉yarn或k8s application | +| master.registry-disconnect-strategy.strategy | stop | 当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting | +| master.registry-disconnect-strategy.max-waiting-time | 100s | 当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, | +| 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 | +| master.master.worker-group-refresh-interval | 10s | 定期将workerGroup从数据库中同步到内存的时间间隔 | ## Worker Server相关配置 位置:`worker-server/conf/application.yaml` -|参数 |默认值| 描述| -|--|--|--| -|worker.listen-port|1234|worker监听端口| -|worker.exec-threads|100|worker工作线程数量,用于限制并行的任务实例数量| -|worker.heartbeat-interval|10|worker心跳间隔,单位为秒| -|worker.host-weight|100|派发任务时,worker主机的权重| -|worker.tenant-auto-create|true|租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。| -|worker.max-cpu-load-avg|1|worker最大cpuload均值,只有高于系统cpuload均值时,worker服务才能被派发任务. 默认值为1: 会使用100%的CPU| -|worker.reserved-memory|0.3|worker预留内存,只有低于系统可用内存时,worker服务才能被派发任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流| -|worker.alert-listen-host|localhost|alert监听host| -|worker.alert-listen-port|50052|alert监听端口| -|worker.registry-disconnect-strategy.strategy|stop|当Worker与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting| -|worker.registry-disconnect-strategy.max-waiting-time|100s|当Worker与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Worker与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Worker会丢弃kill正在执行的任务。值为0表示会无限期等待 | -|worker.task-execute-threads-full-policy|REJECT|如果是 REJECT, 当Worker中等待队列中的任务数达到exec-threads时, Worker将会拒绝接下来新接收的任务,Master将会重新分发该任务; 如果是 CONTINUE, Worker将会接收任务,放入等待队列中等待空闲线程去执行该任务| + +| 参数 | 默认值 | 描述 | +|------------------------------------------------------|-----------|-------------------------------------------------------------------------------------------------------------------------------------------| +| worker.listen-port | 1234 | worker监听端口 | +| worker.exec-threads | 100 | worker工作线程数量,用于限制并行的任务实例数量 | +| worker.heartbeat-interval | 10 | worker心跳间隔,单位为秒 | +| worker.host-weight | 100 | 派发任务时,worker主机的权重 | +| worker.tenant-auto-create | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | +| worker.max-cpu-load-avg | 1 | worker最大cpuload均值,只有高于系统cpuload均值时,worker服务才能被派发任务. 默认值为1: 会使用100%的CPU | +| worker.reserved-memory | 0.3 | worker预留内存,只有低于系统可用内存时,worker服务才能被派发任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流 | +| worker.alert-listen-host | localhost | alert监听host | +| worker.alert-listen-port | 50052 | alert监听端口 | +| worker.registry-disconnect-strategy.strategy | stop | 当Worker与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting | +| worker.registry-disconnect-strategy.max-waiting-time | 100s | 当Worker与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Worker与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Worker会丢弃kill正在执行的任务。值为0表示会无限期等待 | +| worker.task-execute-threads-full-policy | REJECT | 如果是 REJECT, 当Worker中等待队列中的任务数达到exec-threads时, Worker将会拒绝接下来新接收的任务,Master将会重新分发该任务; 如果是 CONTINUE, Worker将会接收任务,放入等待队列中等待空闲线程去执行该任务 | +| worker.tenant-config.auto-create-tenant-enabled | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | +| worker.tenant-config.distributed-tenant-enabled | false | 如果设置为true, auto-create-tenant-enabled 将会不起作用。 | +| worker.tenant-config.default-tenant-enabled | false | 如果设置为true, 将会使用worker服务启动用户作为 `default` 租户。 | ## Alert Server相关配置 位置:`alert-server/conf/application.yaml` -|参数 |默认值| 描述| -|--|--|--| -|server.port|50053|Alert Server监听端口| -|alert.port|50052|alert监听端口| + +| 参数 | 默认值 | 描述 | +|-------------|-------|------------------| +| server.port | 50053 | Alert Server监听端口 | +| alert.port | 50052 | alert监听端口 | ## Quartz相关配置 这里面主要是quartz配置,请结合实际业务场景&资源进行配置,本文暂时不做展开,配置文件位置: -|服务名称| 配置文件 | -|--|--| -|Master Server | `master-server/conf/application.yaml`| -|Api Server| `api-server/conf/application.yaml`| +| 服务名称 | 配置文件 | +|---------------|---------------------------------------| +| Master Server | `master-server/conf/application.yaml` | +| Api Server | `api-server/conf/application.yaml` | 默认配置如下: diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/OSUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/OSUtils.java index a0feb502d8..22563efe46 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/OSUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/OSUtils.java @@ -18,7 +18,6 @@ package org.apache.dolphinscheduler.common.utils; import org.apache.dolphinscheduler.common.constants.Constants; -import org.apache.dolphinscheduler.common.constants.TenantConstants; import org.apache.dolphinscheduler.common.shell.ShellExecutor; import oshi.SystemInfo; @@ -400,8 +399,6 @@ public class OSUtils { if (!isSudoEnable() || StringUtils.isEmpty(tenantCode)) { return command; } - tenantCode = TenantConstants.DEFAULT_TENANT_CODE.equals(tenantCode) ? TenantConstants.BOOTSTRAPT_SYSTEM_USER - : tenantCode; return String.format("sudo -u %s %s", tenantCode, command); } diff --git a/dolphinscheduler-standalone-server/src/main/resources/application.yaml b/dolphinscheduler-standalone-server/src/main/resources/application.yaml index d9e25f0131..363943587b 100644 --- a/dolphinscheduler-standalone-server/src/main/resources/application.yaml +++ b/dolphinscheduler-standalone-server/src/main/resources/application.yaml @@ -207,15 +207,18 @@ worker: heartbeat-interval: 10s # worker host weight to dispatch tasks, default value 100 host-weight: 100 - # tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. - tenant-auto-create: true - #Scenes to be used for distributed users.For example,users created by FreeIpa are stored in LDAP.This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants. - tenant-distributed-user: false # worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu. max-cpu-load-avg: 1 # worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.1, only the available memory is higher than 10%, worker server can receive task. reserved-memory: 0.1 task-execute-threads-full-policy: REJECT + tenant-config: + # tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. + auto-create-tenant-enabled: true + # Scenes to be used for distributed users. For example, users created by FreeIpa are stored in LDAP. This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants. + distributed-tenant: false + # If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`; + default-tenant-enabled: true alert: port: 50052 diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/TenantConfig.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/TenantConfig.java new file mode 100644 index 0000000000..9d2753f88d --- /dev/null +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/TenantConfig.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.server.worker.config; + +import lombok.Data; + +@Data +public class TenantConfig { + + private boolean autoCreateTenantEnabled = true; + private boolean distributedTenantEnabled = false; + private boolean defaultTenantEnabled = false; +} diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java index f1b821901f..db85d50244 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerConfig.java @@ -45,8 +45,6 @@ public class WorkerConfig implements Validator { private int execThreads = 10; private Duration heartbeatInterval = Duration.ofSeconds(10); private int hostWeight = 100; - private boolean tenantAutoCreate = true; - private boolean tenantDistributedUser = false; private int maxCpuLoadAvg = -1; private double reservedMemory = 0.1; private ConnectStrategyProperties registryDisconnectStrategy = new ConnectStrategyProperties(); @@ -59,6 +57,8 @@ public class WorkerConfig implements Validator { private TaskExecuteThreadsFullPolicy taskExecuteThreadsFullPolicy = TaskExecuteThreadsFullPolicy.REJECT; + private TenantConfig tenantConfig = new TenantConfig(); + @Override public boolean supports(Class clazz) { return WorkerConfig.class.isAssignableFrom(clazz); @@ -92,8 +92,7 @@ public class WorkerConfig implements Validator { "\n exec-threads -> " + execThreads + "\n heartbeat-interval -> " + heartbeatInterval + "\n host-weight -> " + hostWeight + - "\n tenant-auto-create -> " + tenantAutoCreate + - "\n tenant-distributed-user -> " + tenantDistributedUser + + "\n tenantConfig -> " + tenantConfig + "\n max-cpu-load-avg -> " + maxCpuLoadAvg + "\n reserved-memory -> " + reservedMemory + "\n registry-disconnect-strategy -> " + registryDisconnectStrategy + diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java index dcfeec4e1c..42d2894f8a 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java @@ -28,6 +28,7 @@ import org.apache.dolphinscheduler.plugin.task.api.model.ResourceInfo; import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters; import org.apache.dolphinscheduler.plugin.task.api.parameters.ParametersNode; import org.apache.dolphinscheduler.plugin.task.api.resource.ResourceContext; +import org.apache.dolphinscheduler.server.worker.config.TenantConfig; import org.apache.dolphinscheduler.server.worker.config.WorkerConfig; import org.apache.dolphinscheduler.server.worker.metrics.WorkerServerMetrics; @@ -47,9 +48,11 @@ public class TaskExecutionContextUtils { public static String getOrCreateTenant(WorkerConfig workerConfig, TaskExecutionContext taskExecutionContext) { try { + TenantConfig tenantConfig = workerConfig.getTenantConfig(); + String tenantCode = taskExecutionContext.getTenantCode(); - if (TenantConstants.DEFAULT_TENANT_CODE.equals(tenantCode)) { - log.info("Current tenant is default tenant, will use {} to execute the task", + if (TenantConstants.DEFAULT_TENANT_CODE.equals(tenantCode) && tenantConfig.isDefaultTenantEnabled()) { + log.info("Current tenant is default tenant, will use bootstrap user: {} to execute the task", TenantConstants.BOOTSTRAPT_SYSTEM_USER); return TenantConstants.BOOTSTRAPT_SYSTEM_USER; } @@ -57,10 +60,10 @@ public class TaskExecutionContextUtils { // if Using distributed is true and Currently supported systems are linux,Should not let it // automatically // create tenants,so TenantAutoCreate has no effect - if (workerConfig.isTenantDistributedUser() && SystemUtils.IS_OS_LINUX) { + if (tenantConfig.isDistributedTenantEnabled() && SystemUtils.IS_OS_LINUX) { // use the id command to judge in linux osUserExistFlag = OSUtils.existTenantCodeInLinux(tenantCode); - } else if (OSUtils.isSudoEnable() && workerConfig.isTenantAutoCreate()) { + } else if (OSUtils.isSudoEnable() && tenantConfig.isAutoCreateTenantEnabled()) { // if not exists this user, then create OSUtils.createUserIfAbsent(tenantCode); osUserExistFlag = OSUtils.getUserList().contains(tenantCode); diff --git a/dolphinscheduler-worker/src/main/resources/application.yaml b/dolphinscheduler-worker/src/main/resources/application.yaml index 5cab5867c7..50a3f19917 100644 --- a/dolphinscheduler-worker/src/main/resources/application.yaml +++ b/dolphinscheduler-worker/src/main/resources/application.yaml @@ -47,10 +47,6 @@ worker: heartbeat-interval: 10s # worker host weight to dispatch tasks, default value 100 host-weight: 100 - # tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. - tenant-auto-create: true - #Scenes to be used for distributed users.For example,users created by FreeIpa are stored in LDAP.This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants. - tenant-distributed-user: false # worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu. max-cpu-load-avg: 1 # worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.3, only the available memory is higher than 30%, worker server can receive task. @@ -61,6 +57,13 @@ worker: # The max waiting time to reconnect to registry if you set the strategy to waiting max-waiting-time: 100s task-execute-threads-full-policy: REJECT + tenant-config: + # tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. + auto-create-tenant-enabled: true + # Scenes to be used for distributed users. For example, users created by FreeIpa are stored in LDAP. This parameter only applies to Linux, When this parameter is true, auto-create-tenant-enabled has no effect and will not automatically create tenants. + distributed-tenant-enabled: false + # If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`. + default-tenant-enabled: false server: port: 1235