Browse Source

[DSIP-54][Master] Use ClusterManager to manage the cluster in master (#16263)

dev
Wenjun Ruan 5 months ago committed by GitHub
parent
commit
8f6df8d840
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 48
      docs/docs/en/architecture/configuration.md
  2. 48
      docs/docs/zh/architecture/configuration.md
  3. 2
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java
  4. 118
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/MapComparator.java
  5. 170
      dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/MapComparatorTest.java
  6. 7
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/AlertDao.java
  7. 9
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/WorkerGroup.java
  8. 11
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/WorkerGroupDao.java
  9. 36
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/impl/WorkerGroupDaoImpl.java
  10. 15
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/utils/WorkerGroupUtils.java
  11. 5
      dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/repository/impl/AlertDaoTest.java
  12. 15
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java
  13. 69
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/AbstractClusterSubscribeListener.java
  14. 33
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/BaseServerMetadata.java
  15. 57
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/ClusterManager.java
  16. 60
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/ClusterStateMonitors.java
  17. 79
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/IClusters.java
  18. 25
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/IMasterSlotReBalancer.java
  19. 99
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/MasterClusters.java
  20. 46
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/MasterServerMetadata.java
  21. 101
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/MasterSlotManager.java
  22. 143
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/WorkerClusters.java
  23. 116
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/WorkerGroupChangeNotifier.java
  24. 52
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/WorkerServerMetadata.java
  25. 115
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/DynamicWeightedRoundRobinWorkerLoadBalancer.java
  26. 109
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/FixedWeightedRoundRobinWorkerLoadBalancer.java
  27. 22
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/IWorkerLoadBalancer.java
  28. 55
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RandomWorkerLoadBalancer.java
  29. 56
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RoundRobinWorkerLoadBalancer.java
  30. 45
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WeightedServer.java
  31. 50
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WorkerLoadBalancerConfiguration.java
  32. 61
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WorkerLoadBalancerConfigurationProperties.java
  33. 9
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WorkerLoadBalancerType.java
  34. 2
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/command/CommandFetcherConfiguration.java
  35. 12
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/command/IdSlotBasedCommandFetcher.java
  36. 21
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java
  37. 47
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/exceptions/ExecuteException.java
  38. 69
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/CommonHostManager.java
  39. 64
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/HostManagerConfig.java
  40. 165
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/LowerWeightHostManager.java
  41. 47
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/RandomHostManager.java
  42. 47
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/RoundRobinHostManager.java
  43. 47
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/AbstractSelector.java
  44. 69
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/HostWeight.java
  45. 77
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/HostWorker.java
  46. 54
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/LowerWeightRoundRobin.java
  47. 66
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RandomSelector.java
  48. 148
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RoundRobinSelector.java
  49. 4
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/exception/dispatch/TaskDispatchException.java
  50. 4
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/exception/dispatch/WorkerGroupNotFoundException.java
  51. 114
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManager.java
  52. 374
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManager.java
  53. 38
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/WorkerInfoChangeListener.java
  54. 102
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/BaseTaskDispatcher.java
  55. 42
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/GlobalTaskDispatchWaitingQueueLooper.java
  56. 28
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteContextFactory.java
  57. 56
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/BaseTaskDispatcher.java
  58. 10
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/MasterTaskDispatcher.java
  59. 4
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/TaskDispatcher.java
  60. 35
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/WorkerTaskDispatcher.java
  61. 21
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/utils/MasterThreadFactory.java
  62. 12
      dolphinscheduler-master/src/main/resources/application.yaml
  63. 106
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/MasterClustersTest.java
  64. 50
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/MasterServerMetadataTest.java
  65. 106
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/MasterSlotManagerTest.java
  66. 167
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/WorkerClustersTest.java
  67. 158
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/WorkerGroupChangeNotifierTest.java
  68. 53
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/WorkerServerMetadataTest.java
  69. 82
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/BaseWorkerLoadBalancerTest.java
  70. 119
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/DynamicWeightedRoundRobinWorkerLoadBalancerTest.java
  71. 118
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/FixedWeightedRoundRobinWorkerLoadBalancerTest.java
  72. 67
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RandomWorkerLoadBalancerTest.java
  73. 69
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RoundRobinWorkerLoadBalancerTest.java
  74. 23
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterConfigTest.java
  75. 43
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/HostWorkerTest.java
  76. 97
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/LowerWeightRoundRobinTest.java
  77. 53
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RandomSelectorTest.java
  78. 106
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RoundRobinSelectorTest.java
  79. 94
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManagerTest.java
  80. 93
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManagerTest.java
  81. 3
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/MasterTaskDispatcherTest.java
  82. 16
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/WorkerTaskDispatcherTest.java
  83. 59
      dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/utils/MasterThreadFactoryTest.java
  84. 10
      dolphinscheduler-master/src/test/resources/application.yaml
  85. 12
      dolphinscheduler-registry/dolphinscheduler-registry-api/src/main/java/org/apache/dolphinscheduler/registry/api/RegistryClient.java
  86. 120
      dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/queue/MasterPriorityQueue.java
  87. 101
      dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/queue/MasterPriorityQueueTest.java
  88. 12
      dolphinscheduler-standalone-server/src/main/resources/application.yaml
  89. 4
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecutorThreadPool.java
  90. 3
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java

48
docs/docs/en/architecture/configuration.md

@ -284,30 +284,30 @@ Location: `api-server/conf/application.yaml`
Location: `master-server/conf/application.yaml`
| Parameters | Default value | Description |
|-----------------------------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| master.listen-port | 5678 | master listen port |
| master.pre-exec-threads | 10 | master prepare execute thread number to limit handle commands in parallel |
| master.exec-threads | 100 | master execute thread number to limit process instances in parallel |
| master.dispatch-task-number | 3 | master dispatch task number per batch |
| master.host-selector | lower_weight | master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight |
| master.max-heartbeat-interval | 10s | master max heartbeat interval |
| master.task-commit-retry-times | 5 | master commit task retry times |
| master.task-commit-interval | 1000 | master commit task interval, the unit is millisecond |
| master.state-wheel-interval | 5 | time to check status |
| master.server-load-protection.enabled | true | If set true, will open master overload protection |
| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. |
| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | Master max JVM cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. |
| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Master max system memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. |
| master.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. |
| master.failover-interval | 10 | failover interval, the unit is minute |
| master.kill-application-when-task-failover | true | whether to kill yarn/k8s application when failover taskInstance |
| master.registry-disconnect-strategy.strategy | stop | Used when the master disconnect from registry, default value: stop. Optional values include stop, waiting |
| master.registry-disconnect-strategy.max-waiting-time | 100s | Used when the master disconnect from registry, and the disconnect strategy is waiting, this config means the master will waiting to reconnect to registry in given times, and after the waiting times, if the master still cannot connect to registry, will stop itself, if the value is 0s, the Master will wait infinitely |
| master.worker-group-refresh-interval | 10s | The interval to refresh worker group from db to memory |
| master.command-fetch-strategy.type | ID_SLOT_BASED | The command fetch strategy, only support `ID_SLOT_BASED` |
| master.command-fetch-strategy.config.id-step | 1 | The id auto incremental step of t_ds_command in db |
| master.command-fetch-strategy.config.fetch-size | 10 | The number of commands fetched by master |
| Parameters | Default value | Description |
|-----------------------------------------------------------------------------|------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| master.listen-port | 5678 | master listen port |
| master.pre-exec-threads | 10 | master prepare execute thread number to limit handle commands in parallel |
| master.exec-threads | 100 | master execute thread number to limit process instances in parallel |
| master.dispatch-task-number | 3 | master dispatch task number per batch |
| master.worker-load-balancer-configuration-properties.type | DYNAMIC_WEIGHTED_ROUND_ROBIN | Master will use the worker's cpu/memory/threadPool usage to calculate the worker load, the lower load will have more change to be dispatched task |
| master.max-heartbeat-interval | 10s | master max heartbeat interval |
| master.task-commit-retry-times | 5 | master commit task retry times |
| master.task-commit-interval | 1000 | master commit task interval, the unit is millisecond |
| master.state-wheel-interval | 5 | time to check status |
| master.server-load-protection.enabled | true | If set true, will open master overload protection |
| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. |
| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | Master max JVM cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. |
| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Master max system memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. |
| master.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. |
| master.failover-interval | 10 | failover interval, the unit is minute |
| master.kill-application-when-task-failover | true | whether to kill yarn/k8s application when failover taskInstance |
| master.registry-disconnect-strategy.strategy | stop | Used when the master disconnect from registry, default value: stop. Optional values include stop, waiting |
| master.registry-disconnect-strategy.max-waiting-time | 100s | Used when the master disconnect from registry, and the disconnect strategy is waiting, this config means the master will waiting to reconnect to registry in given times, and after the waiting times, if the master still cannot connect to registry, will stop itself, if the value is 0s, the Master will wait infinitely |
| master.worker-group-refresh-interval | 10s | The interval to refresh worker group from db to memory |
| master.command-fetch-strategy.type | ID_SLOT_BASED | The command fetch strategy, only support `ID_SLOT_BASED` |
| master.command-fetch-strategy.config.id-step | 1 | The id auto incremental step of t_ds_command in db |
| master.command-fetch-strategy.config.fetch-size | 10 | The number of commands fetched by master |
### Worker Server related configuration

48
docs/docs/zh/architecture/configuration.md

@ -284,30 +284,30 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId
位置:`master-server/conf/application.yaml`
| 参数 | 默认值 | 描述 |
|-----------------------------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------|
| master.listen-port | 5678 | master监听端口 |
| master.pre-exec-threads | 10 | master准备执行任务的数量,用于限制并行的command |
| master.exec-threads | 100 | master工作线程数量,用于限制并行的流程实例数量 |
| master.dispatch-task-number | 3 | master每个批次的派发任务数量 |
| master.host-selector | lower_weight | master host选择器,用于选择合适的worker执行任务,可选值: random, round_robin, lower_weight |
| master.max-heartbeat-interval | 10s | master最大心跳间隔 |
| master.task-commit-retry-times | 5 | 任务重试次数 |
| master.task-commit-interval | 1000 | 任务提交间隔,单位为毫秒 |
| master.state-wheel-interval | 5 | 轮询检查状态时间 |
| master.server-load-protection.enabled | true | 是否开启系统保护策略 |
| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | master最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统CPU |
| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | master最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的JVM CPU |
| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | master最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统内存 |
| master.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | master最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统磁盘空间 |
| master.failover-interval | 10 | failover间隔,单位为分钟 |
| master.kill-application-when-task-failover | true | 当任务实例failover时,是否kill掉yarn或k8s application |
| master.registry-disconnect-strategy.strategy | stop | 当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting |
| master.registry-disconnect-strategy.max-waiting-time | 100s | 当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 |
| master.master.worker-group-refresh-interval | 10s | 定期将workerGroup从数据库中同步到内存的时间间隔 |
| master.command-fetch-strategy.type | ID_SLOT_BASED | Command拉取策略, 目前仅支持 `ID_SLOT_BASED` |
| master.command-fetch-strategy.config.id-step | 1 | 数据库中t_ds_command的id自增步长 |
| master.command-fetch-strategy.config.fetch-size | 10 | master拉取command数量 |
| 参数 | 默认值 | 描述 |
|-----------------------------------------------------------------------------|------------------------------|------------------------------------------------------------------------------------------------------------------------------------------|
| master.listen-port | 5678 | master监听端口 |
| master.pre-exec-threads | 10 | master准备执行任务的数量,用于限制并行的command |
| master.exec-threads | 100 | master工作线程数量,用于限制并行的流程实例数量 |
| master.dispatch-task-number | 3 | master每个批次的派发任务数量 |
| master.worker-load-balancer-configuration-properties.type | DYNAMIC_WEIGHTED_ROUND_ROBIN | Master 将会使用Worker的动态CPU/Memory/线程池使用率来计算Worker的负载,负载越低的worker将会有更高的机会被分发任务 |
| master.max-heartbeat-interval | 10s | master最大心跳间隔 |
| master.task-commit-retry-times | 5 | 任务重试次数 |
| master.task-commit-interval | 1000 | 任务提交间隔,单位为毫秒 |
| master.state-wheel-interval | 5 | 轮询检查状态时间 |
| master.server-load-protection.enabled | true | 是否开启系统保护策略 |
| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | master最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统CPU |
| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | master最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的JVM CPU |
| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | master最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统内存 |
| master.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | master最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统磁盘空间 |
| master.failover-interval | 10 | failover间隔,单位为分钟 |
| master.kill-application-when-task-failover | true | 当任务实例failover时,是否kill掉yarn或k8s application |
| master.registry-disconnect-strategy.strategy | stop | 当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting |
| master.registry-disconnect-strategy.max-waiting-time | 100s | 当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 |
| master.master.worker-group-refresh-interval | 10s | 定期将workerGroup从数据库中同步到内存的时间间隔 |
| master.command-fetch-strategy.type | ID_SLOT_BASED | Command拉取策略, 目前仅支持 `ID_SLOT_BASED` |
| master.command-fetch-strategy.config.id-step | 1 | 数据库中t_ds_command的id自增步长 |
| master.command-fetch-strategy.config.fetch-size | 10 | master拉取command数量 |
## Worker Server相关配置

2
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java

@ -29,6 +29,6 @@ import lombok.experimental.SuperBuilder;
public class WorkerHeartBeat extends BaseHeartBeat implements HeartBeat {
private int workerHostWeight; // worker host weight
private int threadPoolUsage; // worker waiting task count
private double threadPoolUsage; // worker waiting task count
}

118
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/MapComparator.java

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.common.utils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.collections4.SetUtils;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.Getter;
import lombok.ToString;
@Getter
@ToString
public class MapComparator<K, V> {
private final Map<K, V> oldMap;
private final Map<K, V> newMap;
public MapComparator(Map<K, V> oldMap, Map<K, V> newMap) {
this.oldMap = oldMap;
this.newMap = newMap;
}
/**
* Get keys that are in the new map but not in the old map
*/
public Set<K> getKeysToAdd() {
if (MapUtils.isEmpty(newMap)) {
return SetUtils.emptySet();
}
if (MapUtils.isEmpty(oldMap)) {
return new HashSet<>(newMap.keySet());
}
Set<K> keysToAdd = new HashSet<>(newMap.keySet());
keysToAdd.removeAll(oldMap.keySet());
return keysToAdd;
}
/**
* Get values which key in the new map but not in the old map
*/
public List<V> getValuesToAdd() {
if (MapUtils.isEmpty(newMap)) {
return Collections.emptyList();
}
return getKeysToAdd().stream().map(newMap::get).collect(Collectors.toList());
}
/**
* Get keys which in the old map but not in the new map
*/
public Set<K> getKeysToRemove() {
if (MapUtils.isEmpty(oldMap)) {
return SetUtils.emptySet();
}
if (MapUtils.isEmpty(newMap)) {
return new HashSet<>(oldMap.keySet());
}
Set<K> keysToRemove = new HashSet<>(oldMap.keySet());
keysToRemove.removeAll(newMap.keySet());
return keysToRemove;
}
/**
* Get values which key in the old map but not in the new map
*/
public List<V> getValuesToRemove() {
if (MapUtils.isEmpty(oldMap)) {
return Collections.emptyList();
}
return getKeysToRemove().stream().map(oldMap::get).collect(Collectors.toList());
}
/**
* Get keys which in both the old map and the new map, but the value is different
*/
public Set<K> getKeysToUpdate() {
if (MapUtils.isEmpty(oldMap) || MapUtils.isEmpty(newMap)) {
return SetUtils.emptySet();
}
Set<K> keysToUpdate = new HashSet<>(newMap.keySet());
keysToUpdate.retainAll(oldMap.keySet());
keysToUpdate.removeIf(key -> newMap.get(key).equals(oldMap.get(key)));
return keysToUpdate;
}
/**
* Get new values which key in both the old map and the new map, but the value is different
*/
public List<V> getNewValuesToUpdate() {
if (MapUtils.isEmpty(oldMap) || MapUtils.isEmpty(newMap)) {
return Collections.emptyList();
}
return getKeysToUpdate().stream().map(newMap::get).collect(Collectors.toList());
}
}

170
dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/MapComparatorTest.java

@ -0,0 +1,170 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.common.utils;
import static com.google.common.truth.Truth.assertThat;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
class MapComparatorTest {
@Test
void getKeysToAdd() {
MapComparator<String, String> mapComparator = getMapComparator();
assertThat(mapComparator.getKeysToAdd()).containsExactly("key5");
}
@Test
void getKeysToAdd_newMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithNewMapIsNull();
assertThat(mapComparator.getKeysToAdd()).isEmpty();
}
@Test
void getKeysToAdd_oldMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithOldMapIsNull();
assertThat(mapComparator.getKeysToAdd()).containsExactly("key1", "key2", "key3", "key5");
}
@Test
void getValuesToAdd() {
MapComparator<String, String> mapComparator = getMapComparator();
assertThat(mapComparator.getValuesToAdd()).containsExactly("map2_value5");
}
@Test
void getValuesToAdd_newMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithNewMapIsNull();
assertThat(mapComparator.getValuesToAdd()).isEmpty();
}
@Test
void getValuesToAdd_oldMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithOldMapIsNull();
assertThat(mapComparator.getValuesToAdd())
.containsExactly("map2_value1", "map2_value2", "map2_value3", "map2_value5");
}
@Test
void getKeysToRemove() {
MapComparator<String, String> mapComparator = getMapComparator();
assertThat(mapComparator.getKeysToRemove()).containsExactly("key4");
}
@Test
void getKeysToRemove_newMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithNewMapIsNull();
assertThat(mapComparator.getKeysToRemove()).containsExactly("key1", "key2", "key3", "key4");
}
@Test
void getKeysToRemove_oldMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithOldMapIsNull();
assertThat(mapComparator.getKeysToRemove()).isEmpty();
}
@Test
void getValuesToRemove() {
MapComparator<String, String> mapComparator = getMapComparator();
assertThat(mapComparator.getValuesToRemove()).containsExactly("map1_value4");
}
@Test
void getValuesToRemove_newMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithNewMapIsNull();
assertThat(mapComparator.getValuesToRemove())
.containsExactly("map1_value1", "map1_value2", "map1_value3", "map1_value4");
}
@Test
void getValuesToRemove_oldMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithOldMapIsNull();
assertThat(mapComparator.getValuesToRemove()).isEmpty();
}
@Test
void getKeysToUpdate() {
MapComparator<String, String> mapComparator = getMapComparator();
assertThat(mapComparator.getKeysToUpdate()).containsExactly("key1", "key2", "key3");
}
@Test
void getKeysToUpdate_newMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithNewMapIsNull();
assertThat(mapComparator.getKeysToUpdate()).isEmpty();
}
@Test
void getKeysToUpdate_oldMapIsNull() {
MapComparator<String, String> mapComparator = getMapComparatorWithOldMapIsNull();
assertThat(mapComparator.getKeysToUpdate()).isEmpty();
}
@Test
void getNewValuesToUpdate() {
MapComparator<String, String> mapComparator = getMapComparator();
assertThat(mapComparator.getNewValuesToUpdate())
.containsExactly("map2_value1", "map2_value2", "map2_value3");
}
@Test
void getNewValuesToUpdate_newMapIsEmpty() {
MapComparator<String, String> mapComparator = getMapComparatorWithNewMapIsNull();
assertThat(mapComparator.getNewValuesToUpdate()).isEmpty();
}
@Test
void getNewValuesToUpdate_oldMapIsEmpty() {
MapComparator<String, String> mapComparator = getMapComparatorWithOldMapIsNull();
assertThat(mapComparator.getNewValuesToUpdate()).isEmpty();
}
private MapComparator<String, String> getMapComparator() {
Map<String, String> map1 = new HashMap<>();
map1.put("key1", "map1_value1");
map1.put("key2", "map1_value2");
map1.put("key3", "map1_value3");
map1.put("key4", "map1_value4");
Map<String, String> map2 = new HashMap<>();
map2.put("key1", "map2_value1");
map2.put("key2", "map2_value2");
map2.put("key3", "map2_value3");
map2.put("key5", "map2_value5");
return new MapComparator<>(map1, map2);
}
private MapComparator<String, String> getMapComparatorWithNewMapIsNull() {
Map<String, String> map1 = new HashMap<>();
map1.put("key1", "map1_value1");
map1.put("key2", "map1_value2");
map1.put("key3", "map1_value3");
map1.put("key4", "map1_value4");
return new MapComparator<>(map1, null);
}
private MapComparator<String, String> getMapComparatorWithOldMapIsNull() {
Map<String, String> map2 = new HashMap<>();
map2.put("key1", "map2_value1");
map2.put("key2", "map2_value2");
map2.put("key3", "map2_value3");
map2.put("key5", "map2_value5");
return new MapComparator<>(null, map2);
}
}

7
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/AlertDao.java

@ -65,6 +65,8 @@ public class AlertDao {
private static final Integer QUERY_ALERT_THRESHOLD = 100;
private static final int ADMIN_ALERT_GROUP_ID = 1;
@Value("${alert.alarm-suppression.crash:60}")
private Integer crashAlarmSuppression;
@ -159,11 +161,10 @@ public class AlertDao {
/**
* MasterServer or WorkerServer stopped
*
* @param alertGroupId alertGroupId
* @param host host
* @param serverType serverType
*/
public void sendServerStoppedAlert(int alertGroupId, String host, String serverType) {
public void sendServerStoppedAlert(String host, String serverType) {
ServerAlertContent serverStopAlertContent = ServerAlertContent.newBuilder().type(serverType)
.host(host)
.event(AlertEvent.SERVER_DOWN)
@ -175,7 +176,7 @@ public class AlertDao {
alert.setWarningType(WarningType.FAILURE);
alert.setAlertStatus(AlertStatus.WAIT_EXECUTION);
alert.setContent(content);
alert.setAlertGroupId(alertGroupId);
alert.setAlertGroupId(ADMIN_ALERT_GROUP_ID);
alert.setCreateTime(new Date());
alert.setUpdateTime(new Date());
alert.setAlertType(AlertType.FAULT_TOLERANCE_WARNING);

9
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/WorkerGroup.java

@ -19,18 +19,21 @@ package org.apache.dolphinscheduler.dao.entity;
import java.util.Date;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
/**
* worker group
*/
@TableName("t_ds_worker_group")
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class WorkerGroup {
@TableId(value = "id", type = IdType.AUTO)

11
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/enums/ExecutorType.java → dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/WorkerGroupDao.java

@ -15,14 +15,9 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.enums;
package org.apache.dolphinscheduler.dao.repository;
// todo: refactor this enum
public enum ExecutorType {
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
WORKER,
CLIENT,
MASTER,
;
public interface WorkerGroupDao extends IDao<WorkerGroup> {
}

36
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/impl/WorkerGroupDaoImpl.java

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.dao.repository.impl;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.dao.mapper.WorkerGroupMapper;
import org.apache.dolphinscheduler.dao.repository.BaseDao;
import org.apache.dolphinscheduler.dao.repository.WorkerGroupDao;
import lombok.NonNull;
import org.springframework.stereotype.Repository;
@Repository
public class WorkerGroupDaoImpl extends BaseDao<WorkerGroup, WorkerGroupMapper> implements WorkerGroupDao {
public WorkerGroupDaoImpl(@NonNull WorkerGroupMapper workerGroupMapper) {
super(workerGroupMapper);
}
}

15
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/utils/WorkerGroupUtils.java

@ -17,8 +17,15 @@
package org.apache.dolphinscheduler.dao.utils;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.commons.lang3.StringUtils;
import java.util.Collections;
import java.util.List;
import com.google.common.collect.Lists;
public class WorkerGroupUtils {
private static final String DEFAULT_WORKER_GROUP = "default";
@ -42,4 +49,12 @@ public class WorkerGroupUtils {
return DEFAULT_WORKER_GROUP;
}
public static List<String> getWorkerAddressListFromWorkerGroup(WorkerGroup workerGroup) {
String addrList = workerGroup.getAddrList();
if (StringUtils.isEmpty(addrList)) {
return Collections.emptyList();
}
return Lists.newArrayList(addrList.split(","));
}
}

5
dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/repository/impl/AlertDaoTest.java

@ -56,11 +56,10 @@ class AlertDaoTest extends BaseDaoTest {
@Test
void testSendServerStoppedAlert() {
int alertGroupId = 1;
String host = "127.0.0.998165432";
String serverType = "Master";
alertDao.sendServerStoppedAlert(alertGroupId, host, serverType);
alertDao.sendServerStoppedAlert(alertGroupId, host, serverType);
alertDao.sendServerStoppedAlert(host, serverType);
alertDao.sendServerStoppedAlert(host, serverType);
long count = alertDao.listPendingAlerts(-1)
.stream()
.filter(alert -> alert.getContent().contains(host))

15
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java

@ -31,9 +31,10 @@ import org.apache.dolphinscheduler.plugin.storage.api.StorageConfiguration;
import org.apache.dolphinscheduler.plugin.task.api.TaskPluginManager;
import org.apache.dolphinscheduler.registry.api.RegistryConfiguration;
import org.apache.dolphinscheduler.scheduler.api.SchedulerApi;
import org.apache.dolphinscheduler.server.master.cluster.ClusterManager;
import org.apache.dolphinscheduler.server.master.cluster.ClusterStateMonitors;
import org.apache.dolphinscheduler.server.master.metrics.MasterServerMetrics;
import org.apache.dolphinscheduler.server.master.registry.MasterRegistryClient;
import org.apache.dolphinscheduler.server.master.registry.MasterSlotManager;
import org.apache.dolphinscheduler.server.master.rpc.MasterRpcServer;
import org.apache.dolphinscheduler.server.master.runner.EventExecuteService;
import org.apache.dolphinscheduler.server.master.runner.FailoverExecuteThread;
@ -86,10 +87,13 @@ public class MasterServer implements IStoppable {
private MetricsProvider metricsProvider;
@Autowired
private MasterSlotManager masterSlotManager;
private TaskGroupCoordinator taskGroupCoordinator;
@Autowired
private TaskGroupCoordinator taskGroupCoordinator;
private ClusterStateMonitors clusterStateMonitors;
@Autowired
private ClusterManager clusterManager;
public static void main(String[] args) {
MasterServerMetrics.registerUncachedException(DefaultUncaughtExceptionHandler::getUncaughtExceptionCount);
@ -111,12 +115,13 @@ public class MasterServer implements IStoppable {
TaskPluginManager.loadTaskPlugin();
DataSourceProcessorProvider.initialize();
this.masterSlotManager.start();
// self tolerant
this.masterRegistryClient.start();
this.masterRegistryClient.setRegistryStoppable(this);
this.clusterManager.start();
this.clusterStateMonitors.start();
this.masterSchedulerBootstrap.start();
this.eventExecuteService.start();

69
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/AbstractClusterSubscribeListener.java

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.registry.api.Event;
import org.apache.dolphinscheduler.registry.api.SubscribeListener;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public abstract class AbstractClusterSubscribeListener<T extends BaseServerMetadata> implements SubscribeListener {
@Override
public void notify(Event event) {
try {
// make sure the event is processed in order
synchronized (this) {
Event.Type type = event.type();
T server = parseServerFromHeartbeat(event.data());
if (server == null) {
log.error("Unknown cluster change event: {}", event);
return;
}
switch (type) {
case ADD:
log.info("Server {} added", server);
onServerAdded(server);
break;
case REMOVE:
log.warn("Server {} removed", server);
onServerRemove(server);
break;
case UPDATE:
log.debug("Server {} updated", server);
onServerUpdate(server);
break;
default:
break;
}
}
} catch (Exception ex) {
log.error("Notify cluster change event: {} failed", event, ex);
}
}
abstract T parseServerFromHeartbeat(String serverHeartBeatJson);
public abstract void onServerAdded(T serverHeartBeat);
public abstract void onServerRemove(T serverHeartBeat);
public abstract void onServerUpdate(T serverHeartBeat);
}

33
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/HostManager.java → dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/BaseServerMetadata.java

@ -15,18 +15,33 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host;
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.extract.base.utils.Host;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import java.util.Optional;
import lombok.Data;
import lombok.experimental.SuperBuilder;
/**
* host manager
*/
public interface HostManager {
@Data
@SuperBuilder
public abstract class BaseServerMetadata implements IClusters.IServerMetadata {
private final String address;
private final double cpuUsage;
private final double memoryUsage;
private final ServerStatus serverStatus;
@Override
public String getAddress() {
return address;
}
Optional<Host> select(String workerGroup) throws WorkerGroupNotFoundException;
@Override
public ServerStatus getServerStatus() {
return serverStatus;
}
}

57
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/ClusterManager.java

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.registry.api.RegistryClient;
import org.apache.dolphinscheduler.registry.api.enums.RegistryNodeType;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Slf4j
@Component
public class ClusterManager {
@Getter
private final MasterClusters masterClusters;
@Getter
private final WorkerClusters workerClusters;
@Autowired
private WorkerGroupChangeNotifier workerGroupChangeNotifier;
@Autowired
private RegistryClient registryClient;
public ClusterManager() {
this.masterClusters = new MasterClusters();
this.workerClusters = new WorkerClusters();
}
public void start() {
this.registryClient.subscribe(RegistryNodeType.MASTER.getRegistryPath(), masterClusters);
this.registryClient.subscribe(RegistryNodeType.WORKER.getRegistryPath(), workerClusters);
this.workerGroupChangeNotifier.subscribeWorkerGroupsChange(workerClusters);
log.info("ClusterManager started...");
}
}

60
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/ClusterStateMonitors.java

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.dao.AlertDao;
import org.apache.dolphinscheduler.service.alert.ListenerEventAlertManager;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Slf4j
@Component
public class ClusterStateMonitors {
@Autowired
private ClusterManager clusterManager;
@Autowired
private ListenerEventAlertManager listenerEventAlertManager;
@Autowired
private AlertDao alertDao;
public void start() {
this.clusterManager.getMasterClusters()
.registerListener((IClusters.ServerRemovedListener<MasterServerMetadata>) this::masterRemoved);
this.clusterManager.getWorkerClusters()
.registerListener((IClusters.ServerRemovedListener<WorkerServerMetadata>) this::workerRemoved);
log.info("ClusterStateMonitors started...");
}
void masterRemoved(MasterServerMetadata masterServer) {
// todo: unify the alert message
alertDao.sendServerStoppedAlert(masterServer.getAddress(), "MASTER");
listenerEventAlertManager.publishServerDownListenerEvent(masterServer.getAddress(), "MASTER");
}
void workerRemoved(WorkerServerMetadata workerServer) {
alertDao.sendServerStoppedAlert(workerServer.getAddress(), "WORKER");
listenerEventAlertManager.publishServerDownListenerEvent(workerServer.getAddress(), "WORKER");
}
}

79
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/IClusters.java

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import java.util.List;
public interface IClusters<S extends IClusters.IServerMetadata> {
List<S> getServers();
void registerListener(IClustersChangeListener<S> listener);
interface IServerMetadata {
String getAddress();
ServerStatus getServerStatus();
}
interface IClustersChangeListener<S extends IServerMetadata> {
void onServerAdded(S server);
void onServerRemove(S server);
void onServerUpdate(S server);
}
interface ServerAddedListener<S extends IServerMetadata> extends IClustersChangeListener<S> {
@Override
default void onServerRemove(S server) {
// only care about server added
}
@Override
default void onServerUpdate(S server) {
// only care about server added
}
}
interface ServerRemovedListener<S extends IServerMetadata> extends IClustersChangeListener<S> {
@Override
default void onServerAdded(S server) {
// only care about server removed
}
@Override
void onServerRemove(S server);
@Override
default void onServerUpdate(S server) {
// only care about server added
}
}
}

25
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/IMasterSlotReBalancer.java

@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import java.util.List;
public interface IMasterSlotReBalancer {
void doReBalance(List<MasterServerMetadata> masterServerList);
}

99
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/MasterClusters.java

@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.common.model.MasterHeartBeat;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.commons.collections4.list.UnmodifiableList;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class MasterClusters extends AbstractClusterSubscribeListener<MasterServerMetadata>
implements
IClusters<MasterServerMetadata> {
/**
* Master address -> MasterServer
**/
private final Map<String, MasterServerMetadata> masterServerMap = new ConcurrentHashMap<>();
private final List<IClustersChangeListener<MasterServerMetadata>> masterClusterChangeListeners =
new CopyOnWriteArrayList<>();
@Override
public List<MasterServerMetadata> getServers() {
return UnmodifiableList.unmodifiableList(new ArrayList<>(masterServerMap.values()));
}
public List<MasterServerMetadata> getNormalServers() {
List<MasterServerMetadata> normalMasterServers = masterServerMap.values()
.stream()
.filter(masterServer -> masterServer.getServerStatus() == ServerStatus.NORMAL)
.collect(Collectors.toList());
return UnmodifiableList.unmodifiableList(normalMasterServers);
}
@Override
public void registerListener(IClustersChangeListener<MasterServerMetadata> listener) {
masterClusterChangeListeners.add(listener);
}
@Override
MasterServerMetadata parseServerFromHeartbeat(String masterHeartBeatJson) {
MasterHeartBeat masterHeartBeat = JSONUtils.parseObject(masterHeartBeatJson, MasterHeartBeat.class);
if (masterHeartBeat == null) {
return null;
}
return MasterServerMetadata.parseFromHeartBeat(masterHeartBeat);
}
@Override
public void onServerAdded(MasterServerMetadata masterServer) {
masterServerMap.put(masterServer.getAddress(), masterServer);
for (IClustersChangeListener<MasterServerMetadata> listener : masterClusterChangeListeners) {
listener.onServerAdded(masterServer);
}
}
@Override
public void onServerRemove(MasterServerMetadata masterServer) {
masterServerMap.remove(masterServer.getAddress());
for (IClustersChangeListener<MasterServerMetadata> listener : masterClusterChangeListeners) {
listener.onServerRemove(masterServer);
}
}
@Override
public void onServerUpdate(MasterServerMetadata masterServer) {
masterServerMap.put(masterServer.getAddress(), masterServer);
for (IClustersChangeListener<MasterServerMetadata> listener : masterClusterChangeListeners) {
listener.onServerUpdate(masterServer);
}
}
}

46
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/MasterServerMetadata.java

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.model.MasterHeartBeat;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@EqualsAndHashCode(callSuper = true)
public class MasterServerMetadata extends BaseServerMetadata implements Comparable<MasterServerMetadata> {
public static MasterServerMetadata parseFromHeartBeat(MasterHeartBeat masterHeartBeat) {
return MasterServerMetadata.builder()
.address(masterHeartBeat.getHost() + ":" + masterHeartBeat.getPort())
.cpuUsage(masterHeartBeat.getCpuUsage())
.memoryUsage(masterHeartBeat.getMemoryUsage())
.serverStatus(masterHeartBeat.getServerStatus())
.build();
}
// Use the master address to sort the master server
@Override
public int compareTo(MasterServerMetadata o) {
return this.getAddress().compareTo(o.getAddress());
}
}

101
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/MasterSlotManager.java

@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
@Slf4j
@Component
public class MasterSlotManager implements IMasterSlotReBalancer {
private final MasterClusters masterClusters;
private final MasterConfig masterConfig;
private volatile int currentSlot = -1;
private volatile int totalSlots = 0;
public MasterSlotManager(ClusterManager clusterManager, MasterConfig masterConfig) {
this.masterConfig = masterConfig;
this.masterClusters = clusterManager.getMasterClusters();
this.masterClusters.registerListener(new IClusters.IClustersChangeListener<MasterServerMetadata>() {
@Override
public void onServerAdded(MasterServerMetadata server) {
doReBalance(masterClusters.getNormalServers());
}
@Override
public void onServerRemove(MasterServerMetadata server) {
doReBalance(masterClusters.getNormalServers());
}
@Override
public void onServerUpdate(MasterServerMetadata server) {
doReBalance(masterClusters.getNormalServers());
}
});
}
/**
* Get the current master slot, if the slot is -1, it means the master slot is not available.
*/
public int getCurrentMasterSlot() {
return currentSlot;
}
/**
* Get the total master slots.
*/
public int getTotalMasterSlots() {
return totalSlots;
}
public boolean checkSlotValid() {
return totalSlots > 0 && currentSlot >= 0;
}
@Override
public void doReBalance(List<MasterServerMetadata> normalMasterServers) {
int tmpCurrentSlot = -1;
for (int i = 0; i < normalMasterServers.size(); i++) {
if (normalMasterServers.get(i).getAddress().equals(masterConfig.getMasterAddress())) {
tmpCurrentSlot = i;
break;
}
}
if (tmpCurrentSlot == -1) {
log.warn(
"Do re balance failed, cannot found the current master: {} in the normal master clusters: {}. Please check the current master server status",
masterConfig.getMasterAddress(), normalMasterServers);
currentSlot = -1;
return;
}
totalSlots = normalMasterServers.size();
currentSlot = tmpCurrentSlot;
}
}

143
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/WorkerClusters.java

@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.dao.utils.WorkerGroupUtils;
import org.apache.commons.collections4.list.UnmodifiableList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Collectors;
public class WorkerClusters extends AbstractClusterSubscribeListener<WorkerServerMetadata>
implements
IClusters<WorkerServerMetadata>,
WorkerGroupChangeNotifier.WorkerGroupListener {
// WorkerIdentifier(workerAddress) -> worker
private final Map<String, WorkerServerMetadata> workerMapping = new ConcurrentHashMap<>();
// WorkerGroup -> WorkerIdentifier(workerAddress)
private final Map<String, List<String>> workerGroupMapping = new ConcurrentHashMap<>();
private final List<IClustersChangeListener<WorkerServerMetadata>> workerClusterChangeListeners =
new CopyOnWriteArrayList<>();
@Override
public List<WorkerServerMetadata> getServers() {
return UnmodifiableList.unmodifiableList(new ArrayList<>(workerMapping.values()));
}
public List<String> getWorkerServerAddressByGroup(String workerGroup) {
if (WorkerGroupUtils.getDefaultWorkerGroup().equals(workerGroup)) {
return UnmodifiableList.unmodifiableList(new ArrayList<>(workerMapping.keySet()));
}
return workerGroupMapping.getOrDefault(workerGroup, Collections.emptyList());
}
public List<String> getNormalWorkerServerAddressByGroup(String workerGroup) {
List<String> normalWorkerAddresses = getWorkerServerAddressByGroup(workerGroup)
.stream()
.map(workerMapping::get)
.filter(Objects::nonNull)
.filter(workerServer -> workerServer.getServerStatus() == ServerStatus.NORMAL)
.map(WorkerServerMetadata::getAddress)
.collect(Collectors.toList());
return UnmodifiableList.unmodifiableList(normalWorkerAddresses);
}
public boolean containsWorkerGroup(String workerGroup) {
return WorkerGroupUtils.getDefaultWorkerGroup().equals(workerGroup)
|| workerGroupMapping.containsKey(workerGroup);
}
@Override
public void registerListener(IClustersChangeListener<WorkerServerMetadata> listener) {
workerClusterChangeListeners.add(listener);
}
@Override
public void onWorkerGroupDelete(List<WorkerGroup> workerGroups) {
for (WorkerGroup workerGroup : workerGroups) {
workerGroupMapping.remove(workerGroup.getName());
}
}
@Override
public void onWorkerGroupAdd(List<WorkerGroup> workerGroups) {
// The logic of adding WorkerGroup is the same as updating WorkerGroup
// Both need to change the WorkerGroup mapping to the latest
onWorkerGroupChange(workerGroups);
}
@Override
public void onWorkerGroupChange(List<WorkerGroup> workerGroups) {
for (WorkerGroup workerGroup : workerGroups) {
List<String> activeWorkers = WorkerGroupUtils.getWorkerAddressListFromWorkerGroup(workerGroup)
.stream()
.map(workerMapping::get)
.filter(Objects::nonNull)
.map(WorkerServerMetadata::getAddress)
.collect(Collectors.toList());
workerGroupMapping.put(workerGroup.getName(), activeWorkers);
}
}
@Override
WorkerServerMetadata parseServerFromHeartbeat(String serverHeartBeatJson) {
WorkerHeartBeat workerHeartBeat = JSONUtils.parseObject(serverHeartBeatJson, WorkerHeartBeat.class);
if (workerHeartBeat == null) {
return null;
}
return WorkerServerMetadata.parseFromHeartBeat(workerHeartBeat);
}
@Override
public void onServerAdded(WorkerServerMetadata workerServer) {
workerMapping.put(workerServer.getAddress(), workerServer);
for (IClustersChangeListener<WorkerServerMetadata> listener : workerClusterChangeListeners) {
listener.onServerAdded(workerServer);
}
}
@Override
public void onServerRemove(WorkerServerMetadata workerServer) {
workerMapping.remove(workerServer.getAddress(), workerServer);
for (IClustersChangeListener<WorkerServerMetadata> listener : workerClusterChangeListeners) {
listener.onServerRemove(workerServer);
}
}
@Override
public void onServerUpdate(WorkerServerMetadata workerServer) {
workerMapping.put(workerServer.getAddress(), workerServer);
for (IClustersChangeListener<WorkerServerMetadata> listener : workerClusterChangeListeners) {
listener.onServerUpdate(workerServer);
}
}
}

116
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/WorkerGroupChangeNotifier.java

@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.utils.MapComparator;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.dao.repository.WorkerGroupDao;
import org.apache.dolphinscheduler.server.master.utils.MasterThreadFactory;
import org.apache.commons.collections4.CollectionUtils;
import java.time.Duration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
/**
* Use to watch the worker group from database and notify the change.
*/
@Slf4j
@Component
public class WorkerGroupChangeNotifier {
private static final long DEFAULT_REFRESH_WORKER_INTERVAL = Duration.ofMinutes(1).toMillis();
private final WorkerGroupDao workerGroupDao;
private final List<WorkerGroupListener> listeners = new CopyOnWriteArrayList<>();
private Map<String, WorkerGroup> workerGroupMap = new HashMap<>();
public WorkerGroupChangeNotifier(WorkerGroupDao workerGroupDao) {
this.workerGroupDao = workerGroupDao;
detectWorkerGroupChanges();
MasterThreadFactory.getDefaultSchedulerThreadExecutor().scheduleWithFixedDelay(
this::detectWorkerGroupChanges,
DEFAULT_REFRESH_WORKER_INTERVAL,
DEFAULT_REFRESH_WORKER_INTERVAL,
TimeUnit.SECONDS);
}
public void subscribeWorkerGroupsChange(WorkerGroupListener listener) {
listeners.add(listener);
}
void detectWorkerGroupChanges() {
try {
MapComparator<String, WorkerGroup> mapComparator = detectChangedWorkerGroups();
triggerListeners(mapComparator);
workerGroupMap = mapComparator.getNewMap();
} catch (Exception ex) {
log.error("Detect WorkerGroup changes failed", ex);
}
}
Map<String, WorkerGroup> getWorkerGroupMap() {
return workerGroupMap;
}
private MapComparator<String, WorkerGroup> detectChangedWorkerGroups() {
final Map<String, WorkerGroup> tmpWorkerGroupMap = workerGroupDao.queryAll()
.stream()
.collect(Collectors.toMap(WorkerGroup::getName, workerGroup -> workerGroup));
return new MapComparator<>(workerGroupMap, tmpWorkerGroupMap);
}
private void triggerListeners(MapComparator<String, WorkerGroup> mapComparator) {
if (CollectionUtils.isEmpty(listeners)) {
return;
}
final List<WorkerGroup> workerGroupsAdded = mapComparator.getValuesToAdd();
if (CollectionUtils.isNotEmpty(workerGroupsAdded)) {
listeners.forEach(listener -> listener.onWorkerGroupAdd(workerGroupsAdded));
}
final List<WorkerGroup> workerGroupsRemoved = mapComparator.getValuesToRemove();
if (CollectionUtils.isNotEmpty(workerGroupsRemoved)) {
listeners.forEach(listener -> listener.onWorkerGroupDelete(workerGroupsRemoved));
}
final List<WorkerGroup> workerGroupsUpdated = mapComparator.getNewValuesToUpdate();
if (CollectionUtils.isNotEmpty(workerGroupsUpdated)) {
listeners.forEach(listener -> listener.onWorkerGroupChange(workerGroupsUpdated));
}
}
public interface WorkerGroupListener {
void onWorkerGroupDelete(List<WorkerGroup> workerGroups);
void onWorkerGroupAdd(List<WorkerGroup> workerGroups);
void onWorkerGroupChange(List<WorkerGroup> workerGroups);
}
}

52
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/WorkerServerMetadata.java

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@EqualsAndHashCode(callSuper = true)
public class WorkerServerMetadata extends BaseServerMetadata {
@Builder.Default
private final String workerGroup = "default";
// Only used in FixedWeightedRoundRobinWorkerLoadBalancer
@Builder.Default
private final double workerWeight = 1;
private final double taskThreadPoolUsage;
public static WorkerServerMetadata parseFromHeartBeat(WorkerHeartBeat workerHeartBeat) {
return WorkerServerMetadata.builder()
.address(workerHeartBeat.getHost() + ":" + workerHeartBeat.getPort())
.cpuUsage(workerHeartBeat.getCpuUsage())
.memoryUsage(workerHeartBeat.getMemoryUsage())
.serverStatus(workerHeartBeat.getServerStatus())
.workerWeight(workerHeartBeat.getWorkerHostWeight())
.taskThreadPoolUsage(workerHeartBeat.getThreadPoolUsage())
.build();
}
}

115
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/DynamicWeightedRoundRobinWorkerLoadBalancer.java

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.IClusters;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import org.apache.dolphinscheduler.server.master.cluster.WorkerServerMetadata;
import org.apache.commons.collections4.CollectionUtils;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.jetbrains.annotations.NotNull;
/**
* This load balancer is used to select a worker from {@link WorkerClusters} by dynamic weights.
* </p>
* The dynamic weights are calculated by the worker's load. e.g. cpu/memory/disk usage/thread usage etc.
* You can config the weight calculation strategy in {@link WorkerLoadBalancerConfigurationProperties.DynamicWeightConfigProperties}.
*/
public class DynamicWeightedRoundRobinWorkerLoadBalancer implements IWorkerLoadBalancer {
private final WorkerClusters workerClusters;
private final AtomicInteger robinIndex = new AtomicInteger(0);
private Map<String, WeightedServer<WorkerServerMetadata>> weightedServerMap = new ConcurrentHashMap<>();
public DynamicWeightedRoundRobinWorkerLoadBalancer(WorkerClusters workerClusters,
WorkerLoadBalancerConfigurationProperties.DynamicWeightConfigProperties dynamicWeightConfigProperties) {
this.workerClusters = workerClusters;
this.workerClusters.registerListener(new IClusters.IClustersChangeListener<WorkerServerMetadata>() {
@Override
public void onServerAdded(WorkerServerMetadata server) {
weightedServerMap.put(server.getAddress(), new WeightedServer<>(server, calculateWeight(server)));
}
@Override
public void onServerRemove(WorkerServerMetadata server) {
weightedServerMap.remove(server.getAddress());
}
@Override
public void onServerUpdate(WorkerServerMetadata server) {
weightedServerMap.put(server.getAddress(), new WeightedServer<>(server, calculateWeight(server)));
}
private double calculateWeight(WorkerServerMetadata server) {
return 100 - (dynamicWeightConfigProperties.getCpuUsageWeight() * server.getCpuUsage()
+ dynamicWeightConfigProperties.getMemoryUsageWeight() * server.getMemoryUsage()
+ dynamicWeightConfigProperties.getTaskThreadPoolUsageWeight()
* server.getTaskThreadPoolUsage())
/ 3;
}
});
}
@Override
public Optional<String> select(@NotNull String workerGroup) {
List<WeightedServer<WorkerServerMetadata>> weightedServers =
workerClusters.getNormalWorkerServerAddressByGroup(workerGroup)
.stream()
.map(weightedServerMap::get)
.filter(Objects::nonNull) // filter non null here to avoid the two map changed between
// workerClusters
// and weightedServerMap is not atomic
.collect(Collectors.toList());
if (CollectionUtils.isEmpty(weightedServers)) {
return Optional.empty();
}
double totalWeight = weightedServers.stream().mapToDouble(WeightedServer::getWeight).sum();
WeightedServer<WorkerServerMetadata> selectedWorker = null;
while (selectedWorker == null) {
WeightedServer<WorkerServerMetadata> tmpWorker =
weightedServers.get((robinIndex.incrementAndGet()) % weightedServers.size());
tmpWorker.setCurrentWeight(tmpWorker.getCurrentWeight() + tmpWorker.getWeight());
if (tmpWorker.getCurrentWeight() >= totalWeight) {
tmpWorker.setCurrentWeight(tmpWorker.getCurrentWeight() - totalWeight);
selectedWorker = tmpWorker;
}
}
return Optional.of(selectedWorker.getServer().getAddress());
}
@Override
public WorkerLoadBalancerType getType() {
return WorkerLoadBalancerType.DYNAMIC_WEIGHTED_ROUND_ROBIN;
}
}

109
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/FixedWeightedRoundRobinWorkerLoadBalancer.java

@ -0,0 +1,109 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.IClusters;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import org.apache.dolphinscheduler.server.master.cluster.WorkerServerMetadata;
import org.apache.commons.collections4.CollectionUtils;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.jetbrains.annotations.NotNull;
/**
* This load balancer uses a fixed weighted round robin algorithm to select a worker from {@link WorkerClusters}.
* <p>
* e.g. if there are 3 worker servers with weight 1, 2, 3, the selection will be like:
* 1, 2, 3, 1, 2, 3, 3, 1, 2, 3, 3, 1, 2, 3, ...
* <p>
* The weight of each worker server is determined by the worker server itself.
*/
public class FixedWeightedRoundRobinWorkerLoadBalancer implements IWorkerLoadBalancer {
private final WorkerClusters workerClusters;
private final AtomicInteger robinIndex = new AtomicInteger(0);
private final Map<String, WeightedServer<WorkerServerMetadata>> weightedServerMap = new ConcurrentHashMap<>();
public FixedWeightedRoundRobinWorkerLoadBalancer(WorkerClusters workerClusters) {
this.workerClusters = workerClusters;
this.workerClusters.registerListener(new IClusters.IClustersChangeListener<WorkerServerMetadata>() {
@Override
public void onServerAdded(WorkerServerMetadata server) {
weightedServerMap.put(server.getAddress(), new WeightedServer<>(server, server.getWorkerWeight()));
}
@Override
public void onServerRemove(WorkerServerMetadata server) {
weightedServerMap.remove(server.getAddress(), new WeightedServer<>(server, server.getWorkerWeight()));
}
@Override
public void onServerUpdate(WorkerServerMetadata server) {
// don't care the update event, since this will not affect the weight
}
});
}
@Override
public Optional<String> select(@NotNull String workerGroup) {
List<WeightedServer<WorkerServerMetadata>> weightedServers =
workerClusters.getNormalWorkerServerAddressByGroup(workerGroup)
.stream()
.map(weightedServerMap::get)
// filter non null here to avoid the two map changed between
// workerClusters and weightedServerMap is not atomic
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (CollectionUtils.isEmpty(weightedServers)) {
return Optional.empty();
}
double totalWeight = weightedServers.stream().mapToDouble(WeightedServer::getWeight).sum();
WeightedServer<WorkerServerMetadata> selectedWorker = null;
while (selectedWorker == null) {
WeightedServer<WorkerServerMetadata> tmpWorker =
weightedServers.get((robinIndex.incrementAndGet()) % weightedServers.size());
tmpWorker.setCurrentWeight(tmpWorker.getCurrentWeight() + tmpWorker.getWeight());
if (tmpWorker.getCurrentWeight() >= totalWeight) {
tmpWorker.setCurrentWeight(tmpWorker.getCurrentWeight() - totalWeight);
selectedWorker = tmpWorker;
}
}
return Optional.of(selectedWorker.getServer().getAddress());
}
@Override
public WorkerLoadBalancerType getType() {
return WorkerLoadBalancerType.FIXED_WEIGHTED_ROUND_ROBIN;
}
}

22
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterInfoChangeListener.java → dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/IWorkerLoadBalancer.java

@ -15,21 +15,27 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.registry;
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.common.model.MasterHeartBeat;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import java.util.Map;
import java.util.Optional;
import lombok.NonNull;
/**
* The listener used in {@link ServerNodeManager} to notify the change of master info.
* The worker load balancer used to select a worker from the {@link WorkerClusters} by load balancer algorithm.
*/
public interface MasterInfoChangeListener {
public interface IWorkerLoadBalancer {
/**
* Used to notify the change of master info.
* Select a worker address under the given worker group.
*
* @param masterNodeInfo master node info map, key is master address, value is master info.
* @param workerGroup worker group cannot be null.
* @return the selected worker address, or empty if no worker is available.
*/
void notify(Map<String, MasterHeartBeat> masterNodeInfo);
Optional<String> select(@NonNull String workerGroup);
WorkerLoadBalancerType getType();
}

55
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RandomWorkerLoadBalancer.java

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import org.apache.commons.collections4.CollectionUtils;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ThreadLocalRandom;
import org.jetbrains.annotations.NotNull;
/**
* The worker load balancer used to select a worker from the {@link WorkerClusters} by random algorithm.
*/
public class RandomWorkerLoadBalancer implements IWorkerLoadBalancer {
private final WorkerClusters workerClusters;
public RandomWorkerLoadBalancer(WorkerClusters workerClusters) {
this.workerClusters = workerClusters;
}
@Override
public Optional<String> select(@NotNull String workerGroup) {
List<String> workerServerAddresses = workerClusters.getNormalWorkerServerAddressByGroup(workerGroup);
if (CollectionUtils.isEmpty(workerServerAddresses)) {
return Optional.empty();
}
int index = ThreadLocalRandom.current().nextInt(workerServerAddresses.size());
return Optional.of(workerServerAddresses.get(index));
}
@Override
public WorkerLoadBalancerType getType() {
return WorkerLoadBalancerType.RANDOM;
}
}

56
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RoundRobinWorkerLoadBalancer.java

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import org.apache.commons.collections4.CollectionUtils;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import org.jetbrains.annotations.NotNull;
/**
* The worker load balancer used to select a worker from the {@link WorkerClusters} by round-robin algorithm.
*/
public class RoundRobinWorkerLoadBalancer implements IWorkerLoadBalancer {
private final WorkerClusters workerClusters;
private final AtomicInteger robinIndex = new AtomicInteger(0);
public RoundRobinWorkerLoadBalancer(WorkerClusters workerClusters) {
this.workerClusters = workerClusters;
}
@Override
public Optional<String> select(@NotNull String workerGroup) {
List<String> workerServerAddresses = workerClusters.getNormalWorkerServerAddressByGroup(workerGroup);
if (CollectionUtils.isEmpty(workerServerAddresses)) {
return Optional.empty();
}
return Optional.of(workerServerAddresses.get(robinIndex.getAndIncrement() % workerServerAddresses.size()));
}
@Override
public WorkerLoadBalancerType getType() {
return WorkerLoadBalancerType.ROUND_ROBIN;
}
}

45
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WeightedServer.java

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.IClusters;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class WeightedServer<T extends IClusters.IServerMetadata> {
private T server;
private double weight;
private double currentWeight;
public WeightedServer(T server, double weight) {
this.server = server;
this.weight = weight;
this.currentWeight = 0; // Initialize currentWeight is 0
}
}

50
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WorkerLoadBalancerConfiguration.java

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.ClusterManager;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class WorkerLoadBalancerConfiguration {
@Bean
public IWorkerLoadBalancer randomWorkerLoadBalancer(MasterConfig masterConfig, ClusterManager clusterManager) {
WorkerLoadBalancerConfigurationProperties workerLoadBalancerConfigurationProperties =
masterConfig.getWorkerLoadBalancerConfigurationProperties();
switch (workerLoadBalancerConfigurationProperties.getType()) {
case RANDOM:
return new RandomWorkerLoadBalancer(clusterManager.getWorkerClusters());
case ROUND_ROBIN:
return new RoundRobinWorkerLoadBalancer(clusterManager.getWorkerClusters());
case FIXED_WEIGHTED_ROUND_ROBIN:
return new FixedWeightedRoundRobinWorkerLoadBalancer(clusterManager.getWorkerClusters());
case DYNAMIC_WEIGHTED_ROUND_ROBIN:
return new DynamicWeightedRoundRobinWorkerLoadBalancer(
clusterManager.getWorkerClusters(),
workerLoadBalancerConfigurationProperties.getDynamicWeightConfigProperties());
default:
throw new IllegalArgumentException(
"unSupport worker load balancer type " + workerLoadBalancerConfigurationProperties.getType());
}
}
}

61
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WorkerLoadBalancerConfigurationProperties.java

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import lombok.Data;
import org.springframework.validation.Errors;
@Data
public class WorkerLoadBalancerConfigurationProperties {
private WorkerLoadBalancerType type = WorkerLoadBalancerType.ROUND_ROBIN;
private DynamicWeightConfigProperties dynamicWeightConfigProperties = new DynamicWeightConfigProperties();
public void validate(Errors errors) {
dynamicWeightConfigProperties.validated(errors);
}
@Data
public static class DynamicWeightConfigProperties {
private int cpuUsageWeight = 30;
private int memoryUsageWeight = 30;
private int taskThreadPoolUsageWeight = 40;
public void validated(Errors errors) {
if (cpuUsageWeight < 0) {
errors.rejectValue("cpuUsageWeight", "cpuUsageWeight", "cpuUsageWeight must >= 0");
}
if (memoryUsageWeight < 0) {
errors.rejectValue("memoryUsageWeight", "memoryUsageWeight", "memoryUsageWeight must >= 0");
}
if (taskThreadPoolUsageWeight < 0) {
errors.rejectValue("threadUsageWeight", "threadUsageWeight", "threadUsageWeight must >= 0");
}
if (cpuUsageWeight + memoryUsageWeight + taskThreadPoolUsageWeight != 100) {
errors.rejectValue("cpuUsageWeight", "cpuUsageWeight",
"cpuUsageWeight + memoryUsageWeight + threadUsageWeight must be 100");
}
}
}
}

9
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/HostSelector.java → dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/WorkerLoadBalancerType.java

@ -15,12 +15,11 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
public enum HostSelector {
public enum WorkerLoadBalancerType {
RANDOM,
ROUND_ROBIN,
LOWER_WEIGHT
FIXED_WEIGHTED_ROUND_ROBIN,
DYNAMIC_WEIGHTED_ROUND_ROBIN,
}

2
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/command/CommandFetcherConfiguration.java

@ -20,9 +20,9 @@ package org.apache.dolphinscheduler.server.master.command;
import static com.google.common.base.Preconditions.checkNotNull;
import org.apache.dolphinscheduler.dao.repository.CommandDao;
import org.apache.dolphinscheduler.server.master.cluster.MasterSlotManager;
import org.apache.dolphinscheduler.server.master.config.CommandFetchStrategy;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.registry.MasterSlotManager;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

12
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/command/IdSlotBasedCommandFetcher.java

@ -19,9 +19,9 @@ package org.apache.dolphinscheduler.server.master.command;
import org.apache.dolphinscheduler.dao.entity.Command;
import org.apache.dolphinscheduler.dao.repository.CommandDao;
import org.apache.dolphinscheduler.server.master.cluster.MasterSlotManager;
import org.apache.dolphinscheduler.server.master.config.CommandFetchStrategy;
import org.apache.dolphinscheduler.server.master.metrics.ProcessInstanceMetrics;
import org.apache.dolphinscheduler.server.master.registry.MasterSlotManager;
import java.util.Collections;
import java.util.List;
@ -51,13 +51,13 @@ public class IdSlotBasedCommandFetcher implements ICommandFetcher {
@Override
public List<Command> fetchCommands() {
long scheduleStartTime = System.currentTimeMillis();
int currentSlotIndex = masterSlotManager.getSlot();
int totalSlot = masterSlotManager.getMasterSize();
if (totalSlot <= 0 || currentSlotIndex < 0) {
log.warn("Slot is validated, current master slots: {}, the current slot index is {}", totalSlot,
currentSlotIndex);
if (!masterSlotManager.checkSlotValid()) {
log.warn("MasterSlotManager check slot ({} -> {})is invalidated.",
masterSlotManager.getCurrentMasterSlot(), masterSlotManager.getTotalMasterSlots());
return Collections.emptyList();
}
int currentSlotIndex = masterSlotManager.getCurrentMasterSlot();
int totalSlot = masterSlotManager.getTotalMasterSlots();
List<Command> commands = commandDao.queryCommandByIdSlot(
currentSlotIndex,
totalSlot,

21
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java

@ -20,7 +20,7 @@ package org.apache.dolphinscheduler.server.master.config;
import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.registry.api.ConnectStrategyProperties;
import org.apache.dolphinscheduler.registry.api.enums.RegistryNodeType;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostSelector;
import org.apache.dolphinscheduler.server.master.cluster.loadbalancer.WorkerLoadBalancerConfigurationProperties;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskExecuteRunnable;
import org.apache.dolphinscheduler.server.master.runner.WorkflowExecuteRunnable;
@ -63,14 +63,6 @@ public class MasterConfig implements Validator {
private int masterSyncTaskExecutorThreadPoolSize = Runtime.getRuntime().availableProcessors();
private int masterAsyncTaskExecutorThreadPoolSize = Runtime.getRuntime().availableProcessors();
/**
* The task dispatch thread pool size.
*/
private int dispatchTaskNumber = 3;
/**
* Worker select strategy.
*/
private HostSelector hostSelector = HostSelector.LOWER_WEIGHT;
/**
* Master heart beat task execute interval.
*/
@ -96,6 +88,9 @@ public class MasterConfig implements Validator {
private CommandFetchStrategy commandFetchStrategy = new CommandFetchStrategy();
private WorkerLoadBalancerConfigurationProperties workerLoadBalancerConfigurationProperties =
new WorkerLoadBalancerConfigurationProperties();
/**
* The IP address and listening port of the master server in the format 'ip:listenPort'.
*/
@ -123,9 +118,6 @@ public class MasterConfig implements Validator {
if (masterConfig.getExecThreads() <= 0) {
errors.rejectValue("exec-threads", null, "should be a positive value");
}
if (masterConfig.getDispatchTaskNumber() <= 0) {
errors.rejectValue("dispatch-task-number", null, "should be a positive value");
}
if (masterConfig.getMaxHeartbeatInterval().toMillis() < 0) {
errors.rejectValue("max-heartbeat-interval", null, "should be a valid duration");
}
@ -149,6 +141,7 @@ public class MasterConfig implements Validator {
masterConfig.setMasterAddress(NetUtils.getAddr(masterConfig.getListenPort()));
}
commandFetchStrategy.validate(errors);
workerLoadBalancerConfigurationProperties.validate(errors);
masterConfig.setMasterRegistryPath(
RegistryNodeType.MASTER.getRegistryPath() + "/" + masterConfig.getMasterAddress());
@ -161,8 +154,6 @@ public class MasterConfig implements Validator {
"\n listen-port -> " + listenPort +
"\n pre-exec-threads -> " + preExecThreads +
"\n exec-threads -> " + execThreads +
"\n dispatch-task-number -> " + dispatchTaskNumber +
"\n host-selector -> " + hostSelector +
"\n max-heartbeat-interval -> " + maxHeartbeatInterval +
"\n task-commit-retry-times -> " + taskCommitRetryTimes +
"\n task-commit-interval -> " + taskCommitInterval +
@ -175,6 +166,8 @@ public class MasterConfig implements Validator {
"\n master-registry-path: " + masterRegistryPath +
"\n worker-group-refresh-interval: " + workerGroupRefreshInterval +
"\n command-fetch-strategy: " + commandFetchStrategy +
"\n worker-load-balancer-configuration-properties: "
+ workerLoadBalancerConfigurationProperties +
"\n****************************Master Configuration**************************************";
log.info(config);
}

47
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/exceptions/ExecuteException.java

@ -1,47 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.exceptions;
/**
* execute exception
*/
public class ExecuteException extends Exception {
public ExecuteException() {
super();
}
public ExecuteException(String message) {
super(message);
}
public ExecuteException(String message, Throwable cause) {
super(message, cause);
}
public ExecuteException(Throwable cause) {
super(cause);
}
protected ExecuteException(String message, Throwable cause,
boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
}

69
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/CommonHostManager.java

@ -1,69 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host;
import org.apache.dolphinscheduler.extract.base.utils.Host;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWorker;
import org.apache.dolphinscheduler.server.master.registry.ServerNodeManager;
import org.apache.commons.collections4.CollectionUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.springframework.beans.factory.annotation.Autowired;
/**
* common host manager
*/
public abstract class CommonHostManager implements HostManager {
/**
* server node manager
*/
@Autowired
protected ServerNodeManager serverNodeManager;
@Override
public Optional<Host> select(String workerGroup) throws WorkerGroupNotFoundException {
List<HostWorker> candidates = getWorkerCandidates(workerGroup);
if (CollectionUtils.isEmpty(candidates)) {
return Optional.empty();
}
return Optional.ofNullable(select(candidates));
}
protected abstract HostWorker select(Collection<HostWorker> nodes);
protected List<HostWorker> getWorkerCandidates(String workerGroup) throws WorkerGroupNotFoundException {
List<HostWorker> hostWorkers = new ArrayList<>();
Set<String> nodes = serverNodeManager.getWorkerGroupNodes(workerGroup);
if (CollectionUtils.isNotEmpty(nodes)) {
for (String node : nodes) {
serverNodeManager.getWorkerNodeInfo(node).ifPresent(
workerNodeInfo -> hostWorkers
.add(HostWorker.of(node, workerNodeInfo.getWorkerHostWeight(), workerGroup)));
}
}
return hostWorkers;
}
}

64
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/HostManagerConfig.java

@ -1,64 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostSelector;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.config.AutowireCapableBeanFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* host manager config
*/
@Configuration
public class HostManagerConfig {
private AutowireCapableBeanFactory beanFactory;
@Autowired
private MasterConfig masterConfig;
@Autowired
public HostManagerConfig(AutowireCapableBeanFactory beanFactory) {
this.beanFactory = beanFactory;
}
@Bean
public HostManager hostManager() {
HostSelector selector = masterConfig.getHostSelector();
HostManager hostManager;
switch (selector) {
case RANDOM:
hostManager = new RandomHostManager();
break;
case ROUND_ROBIN:
hostManager = new RoundRobinHostManager();
break;
case LOWER_WEIGHT:
hostManager = new LowerWeightHostManager();
break;
default:
throw new IllegalArgumentException("unSupport selector " + selector);
}
beanFactory.autowireBean(hostManager);
return hostManager;
}
}

165
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/LowerWeightHostManager.java

@ -1,165 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat;
import org.apache.dolphinscheduler.extract.base.utils.Host;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWeight;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWorker;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.LowerWeightRoundRobin;
import org.apache.dolphinscheduler.server.master.registry.WorkerInfoChangeListener;
import org.apache.commons.collections4.CollectionUtils;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
/**
* lower weight host manager
*/
@Slf4j
public class LowerWeightHostManager extends CommonHostManager {
/**
* selector
*/
private LowerWeightRoundRobin selector;
/**
* worker host weights
*/
private ConcurrentHashMap<String, Set<HostWeight>> workerHostWeightsMap;
private final ReentrantReadWriteLock workerGroupLock = new ReentrantReadWriteLock();
private final ReentrantReadWriteLock.ReadLock workerGroupReadLock = workerGroupLock.readLock();
private final ReentrantReadWriteLock.WriteLock workerGroupWriteLock = workerGroupLock.writeLock();
@PostConstruct
public void init() {
this.selector = new LowerWeightRoundRobin();
this.workerHostWeightsMap = new ConcurrentHashMap<>();
serverNodeManager.addWorkerInfoChangeListener(new WorkerWeightListener());
}
@Override
public Optional<Host> select(String workerGroup) throws WorkerGroupNotFoundException {
Set<HostWeight> workerHostWeights = getWorkerHostWeights(workerGroup);
if (CollectionUtils.isNotEmpty(workerHostWeights)) {
return Optional.ofNullable(selector.select(workerHostWeights).getHost());
}
return Optional.empty();
}
@Override
public HostWorker select(Collection<HostWorker> nodes) {
throw new UnsupportedOperationException("not support");
}
private class WorkerWeightListener implements WorkerInfoChangeListener {
@Override
public void notify(Map<String, Set<String>> workerGroups, Map<String, WorkerHeartBeat> workerNodeInfo) {
syncWorkerResources(workerGroups, workerNodeInfo);
}
/**
* Sync worker resource.
*
* @param workerGroupNodes worker group nodes, key is worker group, value is worker group nodes.
* @param workerNodeInfoMap worker node info map, key is worker node, value is worker info.
*/
private void syncWorkerResources(final Map<String, Set<String>> workerGroupNodes,
final Map<String, WorkerHeartBeat> workerNodeInfoMap) {
try {
Map<String, Set<HostWeight>> workerHostWeights = new HashMap<>();
for (Map.Entry<String, Set<String>> entry : workerGroupNodes.entrySet()) {
String workerGroup = entry.getKey();
Set<String> nodes = entry.getValue();
Set<HostWeight> hostWeights = new HashSet<>(nodes.size());
for (String node : nodes) {
WorkerHeartBeat heartbeat = workerNodeInfoMap.getOrDefault(node, null);
Optional<HostWeight> hostWeightOpt = getHostWeight(node, workerGroup, heartbeat);
hostWeightOpt.ifPresent(hostWeights::add);
}
if (!hostWeights.isEmpty()) {
workerHostWeights.put(workerGroup, hostWeights);
}
}
syncWorkerHostWeight(workerHostWeights);
} catch (Throwable ex) {
log.error("Sync worker resource error", ex);
}
}
private void syncWorkerHostWeight(Map<String, Set<HostWeight>> workerHostWeights) {
workerGroupWriteLock.lock();
try {
workerHostWeightsMap.clear();
workerHostWeightsMap.putAll(workerHostWeights);
} finally {
workerGroupWriteLock.unlock();
}
}
}
public Optional<HostWeight> getHostWeight(String workerAddress, String workerGroup, WorkerHeartBeat heartBeat) {
if (heartBeat == null) {
log.warn("Worker {} in WorkerGroup {} have not received the heartbeat", workerAddress, workerGroup);
return Optional.empty();
}
if (ServerStatus.BUSY == heartBeat.getServerStatus()) {
log.warn("Worker {} in workerGroup {} is Busy, heartbeat is {}", workerAddress, workerGroup, heartBeat);
return Optional.empty();
}
return Optional.of(
new HostWeight(
HostWorker.of(workerAddress, heartBeat.getWorkerHostWeight(), workerGroup),
heartBeat.getCpuUsage(),
heartBeat.getMemoryUsage(),
heartBeat.getDiskUsage(),
heartBeat.getThreadPoolUsage(),
heartBeat.getStartupTime()));
}
private Set<HostWeight> getWorkerHostWeights(String workerGroup) throws WorkerGroupNotFoundException {
workerGroupReadLock.lock();
try {
Set<HostWeight> hostWeights = workerHostWeightsMap.get(workerGroup);
if (hostWeights == null) {
throw new WorkerGroupNotFoundException("Can not find worker group " + workerGroup);
}
return hostWeights;
} finally {
workerGroupReadLock.unlock();
}
}
}

47
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/RandomHostManager.java

@ -1,47 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWorker;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.RandomSelector;
import java.util.Collection;
/**
* random host manager
*/
public class RandomHostManager extends CommonHostManager {
/**
* selector
*/
private final RandomSelector selector;
/**
* set round robin
*/
public RandomHostManager() {
this.selector = new RandomSelector();
}
@Override
public HostWorker select(Collection<HostWorker> nodes) {
return selector.select(nodes);
}
}

47
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/RoundRobinHostManager.java

@ -1,47 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWorker;
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.RoundRobinSelector;
import java.util.Collection;
/**
* round robin host manager
*/
public class RoundRobinHostManager extends CommonHostManager {
/**
* selector
*/
private final RoundRobinSelector selector;
/**
* set round robin
*/
public RoundRobinHostManager() {
this.selector = new RoundRobinSelector();
}
@Override
public HostWorker select(Collection<HostWorker> nodes) {
return selector.select(nodes);
}
}

47
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/AbstractSelector.java

@ -1,47 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import org.apache.commons.collections4.CollectionUtils;
import java.util.Collection;
/**
* AbstractSelector
*/
public abstract class AbstractSelector<T> implements Selector<T> {
@Override
public T select(Collection<T> source) {
if (CollectionUtils.isEmpty(source)) {
throw new IllegalArgumentException("Empty source.");
}
/**
* if only one , return directly
*/
if (source.size() == 1) {
return (T) source.toArray()[0];
}
return doSelect(source);
}
protected abstract T doSelect(Collection<T> source);
}

69
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/HostWeight.java

@ -1,69 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import org.apache.dolphinscheduler.extract.base.utils.Constants;
import org.apache.dolphinscheduler.extract.base.utils.Host;
import lombok.Data;
@Data
public class HostWeight {
private final int THREAD_USAGE_FACTOR = 10;
private final int CPU_USAGE_FACTOR = 20;
private final int MEMORY_USAGE_FACTOR = 20;
private final int DISK_USAGE_FACTOR = 50;
private final Host host;
private final double weight;
// if the weight is small, then is will be chosen first
private double currentWeight;
public HostWeight(HostWorker hostWorker,
double cpuUsage,
double memoryUsage,
double diskUsage,
double threadPoolUsage,
long startTime) {
this.host = hostWorker;
this.weight = calculateWeight(cpuUsage, memoryUsage, diskUsage, threadPoolUsage, startTime);
this.currentWeight = this.weight;
}
private double calculateWeight(double cpuUsage,
double memoryUsage,
double diskUsage,
double threadPoolUsage,
long startTime) {
double calculatedWeight = 100 - (cpuUsage * CPU_USAGE_FACTOR + memoryUsage * MEMORY_USAGE_FACTOR
+ diskUsage * DISK_USAGE_FACTOR + threadPoolUsage * THREAD_USAGE_FACTOR);
long uptime = System.currentTimeMillis() - startTime;
if (uptime > 0 && uptime < Constants.WARM_UP_TIME) {
// If the warm-up is not over, add the weight
return calculatedWeight * Constants.WARM_UP_TIME / uptime;
}
return calculatedWeight;
}
}

77
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/HostWorker.java

@ -1,77 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import org.apache.dolphinscheduler.extract.base.utils.Host;
/**
* host worker
*/
public class HostWorker extends Host {
/**
* host weight
*/
private int hostWeight;
/**
* worker group
*/
private String workerGroup;
public HostWorker(String ip, int port, int hostWeight, String workerGroup) {
super(ip, port);
this.hostWeight = hostWeight;
this.workerGroup = workerGroup;
}
public HostWorker(String address, int hostWeight, String workerGroup) {
super(address);
this.hostWeight = hostWeight;
this.workerGroup = workerGroup;
}
public int getHostWeight() {
return hostWeight;
}
public void setHostWeight(int hostWeight) {
this.hostWeight = hostWeight;
}
public String getWorkerGroup() {
return workerGroup;
}
public void setWorkerGroup(String workerGroup) {
this.workerGroup = workerGroup;
}
public static HostWorker of(String address, int hostWeight, String workerGroup) {
return new HostWorker(address, hostWeight, workerGroup);
}
@Override
public String toString() {
return "Host{"
+ "hostWeight=" + hostWeight
+ ", workerGroup='" + workerGroup + '\''
+ '}';
}
}

54
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/LowerWeightRoundRobin.java

@ -1,54 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import java.util.Collection;
/**
* lower weight round robin
*/
public class LowerWeightRoundRobin extends AbstractSelector<HostWeight> {
/**
* Selects a HostWeight from a collection of HostWeight objects.
* The selection is based on the current weight of each HostWeight.
* The HostWeight with the smallest current weight is selected.
*
* @param sources A collection of HostWeight objects to select from.
* @return The selected HostWeight with the smallest current weight.
*/
@Override
public HostWeight doSelect(Collection<HostWeight> sources) {
double totalWeight = 0;
double lowWeight = 0;
HostWeight lowerNode = null;
for (HostWeight hostWeight : sources) {
totalWeight += hostWeight.getWeight();
hostWeight.setCurrentWeight(hostWeight.getCurrentWeight() + hostWeight.getWeight());
if (lowerNode == null || lowWeight > hostWeight.getCurrentWeight()) {
lowerNode = hostWeight;
lowWeight = hostWeight.getCurrentWeight();
}
}
if (lowerNode != null) {
lowerNode.setCurrentWeight(lowerNode.getCurrentWeight() + totalWeight);
}
return lowerNode;
}
}

66
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RandomSelector.java

@ -1,66 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
/**
* random selector
*/
public class RandomSelector extends AbstractSelector<HostWorker> {
/**
* This method selects a HostWorker from a collection of HostWorker objects using a weighted random algorithm.
* The selection is based on the weight of each HostWorker.
* A random number is generated and the HostWorker whose weight spans this random number is selected.
*
* @param source A collection of HostWorker objects to select from.
* @return The selected HostWorker based on the weighted random algorithm.
*/
@Override
public HostWorker doSelect(final Collection<HostWorker> source) {
List<HostWorker> hosts = new ArrayList<>(source);
int size = hosts.size();
int[] weights = new int[size];
int totalWeight = 0;
int index = 0;
for (HostWorker host : hosts) {
totalWeight += host.getHostWeight();
weights[index] = host.getHostWeight();
index++;
}
if (totalWeight > 0) {
int offset = ThreadLocalRandom.current().nextInt(totalWeight);
for (int i = 0; i < size; i++) {
offset -= weights[i];
if (offset < 0) {
return hosts.get(i);
}
}
}
return hosts.get(ThreadLocalRandom.current().nextInt(size));
}
}

148
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RoundRobinSelector.java

@ -1,148 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.springframework.stereotype.Service;
/**
* Smooth Weight Round Robin
*/
@Service
public class RoundRobinSelector extends AbstractSelector<HostWorker> {
private ConcurrentMap<String, ConcurrentMap<String, WeightedRoundRobin>> workGroupWeightMap =
new ConcurrentHashMap<>();
private static final int RECYCLE_PERIOD = 100000;
private AtomicBoolean updateLock = new AtomicBoolean();
protected static class WeightedRoundRobin {
private int weight;
private AtomicLong current = new AtomicLong(0);
private long lastUpdate;
int getWeight() {
return weight;
}
void setWeight(int weight) {
this.weight = weight;
current.set(0);
}
long increaseCurrent() {
return current.addAndGet(weight);
}
void sel(int total) {
current.addAndGet(-1L * total);
}
long getLastUpdate() {
return lastUpdate;
}
void setLastUpdate(long lastUpdate) {
this.lastUpdate = lastUpdate;
}
}
/**
* This method selects a HostWorker from a collection of HostWorker objects using a weighted round-robin algorithm.
* The selection is based on the current weight of each HostWorker.
* The HostWorker with the highest current weight is selected.
*
* @param source A collection of HostWorker objects to select from.
* @return The selected HostWorker with the highest current weight.
*/
@Override
public HostWorker doSelect(Collection<HostWorker> source) {
List<HostWorker> hosts = new ArrayList<>(source);
String key = hosts.get(0).getWorkerGroup();
ConcurrentMap<String, WeightedRoundRobin> map = workGroupWeightMap.get(key);
if (map == null) {
workGroupWeightMap.putIfAbsent(key, new ConcurrentHashMap<>());
map = workGroupWeightMap.get(key);
}
int totalWeight = 0;
long maxCurrent = Long.MIN_VALUE;
long now = System.currentTimeMillis();
HostWorker selectedHost = null;
WeightedRoundRobin selectWeightRoundRobin = null;
for (HostWorker host : hosts) {
String workGroupHost = host.getWorkerGroup() + host.getAddress();
WeightedRoundRobin weightedRoundRobin = map.get(workGroupHost);
int weight = host.getHostWeight();
if (weight < 0) {
weight = 0;
}
if (weightedRoundRobin == null) {
weightedRoundRobin = new WeightedRoundRobin();
// set weight
weightedRoundRobin.setWeight(weight);
map.putIfAbsent(workGroupHost, weightedRoundRobin);
weightedRoundRobin = map.get(workGroupHost);
}
if (weight != weightedRoundRobin.getWeight()) {
weightedRoundRobin.setWeight(weight);
}
long cur = weightedRoundRobin.increaseCurrent();
weightedRoundRobin.setLastUpdate(now);
if (cur > maxCurrent) {
maxCurrent = cur;
selectedHost = host;
selectWeightRoundRobin = weightedRoundRobin;
}
totalWeight += weight;
}
if (!updateLock.get() && hosts.size() != map.size() && updateLock.compareAndSet(false, true)) {
try {
ConcurrentMap<String, WeightedRoundRobin> newMap = new ConcurrentHashMap<>(map);
newMap.entrySet().removeIf(item -> now - item.getValue().getLastUpdate() > RECYCLE_PERIOD);
workGroupWeightMap.put(key, newMap);
} finally {
updateLock.set(false);
}
}
if (selectedHost != null) {
selectWeightRoundRobin.sel(totalWeight);
return selectedHost;
}
return hosts.get(0);
}
}

4
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/exception/TaskDispatchException.java → dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/exception/dispatch/TaskDispatchException.java

@ -15,7 +15,9 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.exception;
package org.apache.dolphinscheduler.server.master.exception.dispatch;
import org.apache.dolphinscheduler.server.master.exception.MasterException;
public class TaskDispatchException extends MasterException {

4
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/exceptions/WorkerGroupNotFoundException.java → dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/exception/dispatch/WorkerGroupNotFoundException.java

@ -15,9 +15,9 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.exceptions;
package org.apache.dolphinscheduler.server.master.exception.dispatch;
public class WorkerGroupNotFoundException extends ExecuteException {
public class WorkerGroupNotFoundException extends TaskDispatchException {
public WorkerGroupNotFoundException(String workerGroup) {
super("Cannot find worker group: " + workerGroup);

114
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManager.java

@ -1,114 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.registry;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.common.model.MasterHeartBeat;
import org.apache.dolphinscheduler.common.model.Server;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.service.queue.MasterPriorityQueue;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@Service
@Slf4j
public class MasterSlotManager {
@Autowired
protected ServerNodeManager serverNodeManager;
@Autowired
protected MasterConfig masterConfig;
private volatile int currentSlot = 0;
private volatile int totalSlot = 0;
public void start() {
serverNodeManager.addMasterInfoChangeListener(new MasterSlotManager.SlotChangeListener());
}
public int getSlot() {
return currentSlot;
}
public int getMasterSize() {
return totalSlot;
}
public class SlotChangeListener implements MasterInfoChangeListener {
private final Lock slotLock = new ReentrantLock();
private final MasterPriorityQueue masterPriorityQueue = new MasterPriorityQueue();
@Override
public void notify(Map<String, MasterHeartBeat> masterNodeInfo) {
List<Server> serverList = masterNodeInfo.values().stream()
.filter(heartBeat -> !heartBeat.getServerStatus().equals(ServerStatus.BUSY))
.map(this::convertHeartBeatToServer)
.collect(Collectors.toList());
syncMasterNodes(serverList);
}
/**
* sync master nodes
*/
private void syncMasterNodes(List<Server> masterNodes) {
slotLock.lock();
try {
this.masterPriorityQueue.clear();
this.masterPriorityQueue.putAll(masterNodes);
int tempCurrentSlot = masterPriorityQueue.getIndex(masterConfig.getMasterAddress());
int tempTotalSlot = masterNodes.size();
if (tempCurrentSlot < 0) {
totalSlot = 0;
currentSlot = 0;
log.warn("Current master is not in active master list");
} else if (tempCurrentSlot != currentSlot || tempTotalSlot != totalSlot) {
totalSlot = tempTotalSlot;
currentSlot = tempCurrentSlot;
log.info("Update master nodes, total master size: {}, current slot: {}", totalSlot, currentSlot);
}
} finally {
slotLock.unlock();
}
}
private Server convertHeartBeatToServer(MasterHeartBeat masterHeartBeat) {
Server server = new Server();
server.setCreateTime(new Date(masterHeartBeat.getStartupTime()));
server.setLastHeartbeatTime(new Date(masterHeartBeat.getReportTime()));
server.setId(masterHeartBeat.getProcessId());
server.setHost(masterHeartBeat.getHost());
server.setPort(masterHeartBeat.getPort());
return server;
}
}
}

374
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManager.java

@ -1,374 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.registry;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.model.MasterHeartBeat;
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat;
import org.apache.dolphinscheduler.common.thread.ThreadUtils;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.dao.AlertDao;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.dao.mapper.WorkerGroupMapper;
import org.apache.dolphinscheduler.dao.utils.WorkerGroupUtils;
import org.apache.dolphinscheduler.registry.api.Event;
import org.apache.dolphinscheduler.registry.api.Event.Type;
import org.apache.dolphinscheduler.registry.api.RegistryClient;
import org.apache.dolphinscheduler.registry.api.SubscribeListener;
import org.apache.dolphinscheduler.registry.api.enums.RegistryNodeType;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.service.alert.ListenerEventAlertManager;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
import javax.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@Service
@Slf4j
public class ServerNodeManager implements InitializingBean {
private final ReentrantReadWriteLock workerGroupLock = new ReentrantReadWriteLock();
private final ReentrantReadWriteLock.ReadLock workerGroupReadLock = workerGroupLock.readLock();
private final ReentrantReadWriteLock.WriteLock workerGroupWriteLock = workerGroupLock.writeLock();
private final ReentrantReadWriteLock workerNodeInfoLock = new ReentrantReadWriteLock();
private final ReentrantReadWriteLock.ReadLock workerNodeInfoReadLock = workerNodeInfoLock.readLock();
private final ReentrantReadWriteLock.WriteLock workerNodeInfoWriteLock = workerNodeInfoLock.writeLock();
private final ReentrantLock masterNodeInfoLock = new ReentrantLock();
/**
* worker group nodes, workerGroup -> ips, combining registryWorkerGroupNodes and dbWorkerGroupNodes
*/
private final ConcurrentHashMap<String, Set<String>> workerGroupNodes = new ConcurrentHashMap<>();
private final Map<String, MasterHeartBeat> masterNodeInfo = new HashMap<>();
private final Map<String, WorkerHeartBeat> workerNodeInfo = new HashMap<>();
/**
* executor service
*/
private ScheduledExecutorService executorService;
@Autowired
private RegistryClient registryClient;
@Autowired
private WorkerGroupMapper workerGroupMapper;
@Autowired
private AlertDao alertDao;
@Autowired
private MasterConfig masterConfig;
@Autowired
private ListenerEventAlertManager listenerEventAlertManager;
private final List<WorkerInfoChangeListener> workerInfoChangeListeners = new ArrayList<>();
private final List<MasterInfoChangeListener> masterInfoChangeListeners = new ArrayList<>();
@Override
public void afterPropertiesSet() {
// load nodes from zookeeper
refreshNodesAndGroupMappings();
// init executor service
executorService = Executors
.newSingleThreadScheduledExecutor(ThreadUtils.newDaemonThreadFactory("ServerNodeManagerExecutor"));
executorService.scheduleWithFixedDelay(
new WorkerNodeInfoAndGroupDbSyncTask(),
0,
masterConfig.getWorkerGroupRefreshInterval().getSeconds(),
TimeUnit.SECONDS);
registryClient.subscribe(RegistryNodeType.MASTER.getRegistryPath(), new MasterDataListener());
registryClient.subscribe(RegistryNodeType.WORKER.getRegistryPath(), new WorkerDataListener());
}
class WorkerNodeInfoAndGroupDbSyncTask implements Runnable {
@Override
public void run() {
try {
// sync worker node info
refreshNodesAndGroupMappings();
} catch (Exception e) {
log.error("WorkerNodeInfoAndGroupDbSyncTask error:", e);
}
}
}
/**
* Refresh master/worker nodes and worker group mapping information
*/
private void refreshNodesAndGroupMappings() {
updateWorkerNodes();
updateWorkerGroupMappings();
notifyWorkerInfoChangeListeners();
updateMasterNodes();
notifyMasterInfoChangeListeners();
}
/**
* worker group node listener
*/
class WorkerDataListener implements SubscribeListener {
@Override
public void notify(Event event) {
final String path = event.path();
final Type type = event.type();
final String data = event.data();
if (registryClient.isWorkerPath(path)) {
try {
String[] parts = path.split("/");
final String workerAddress = parts[parts.length - 1];
// todo: update workerNodeInfo
log.debug("received subscribe event : {}", event);
if (type == Type.ADD) {
log.info("Worker: {} added, currentNode : {}", path, workerAddress);
} else if (type == Type.REMOVE) {
log.info("Worker node : {} down.", path);
removeSingleWorkerNode(workerAddress);
alertDao.sendServerStoppedAlert(1, path, "WORKER");
listenerEventAlertManager.publishServerDownListenerEvent(path, "WORKER");
} else if (type == Type.UPDATE) {
syncSingleWorkerNodeInfo(workerAddress, JSONUtils.parseObject(data, WorkerHeartBeat.class));
}
} catch (Exception ex) {
log.error("WorkerGroupListener capture data change and get data failed", ex);
}
}
}
private void syncSingleWorkerNodeInfo(String workerAddress, WorkerHeartBeat info) {
workerNodeInfoWriteLock.lock();
try {
workerNodeInfo.put(workerAddress, info);
} finally {
workerNodeInfoWriteLock.unlock();
}
}
private void removeSingleWorkerNode(String workerAddress) {
workerNodeInfoWriteLock.lock();
try {
workerNodeInfo.remove(workerAddress);
log.info("remove worker node {} from workerNodeInfo when worker server down", workerAddress);
} finally {
workerNodeInfoWriteLock.unlock();
}
}
}
class MasterDataListener implements SubscribeListener {
@Override
public void notify(Event event) {
final String path = event.path();
final Type type = event.type();
if (registryClient.isMasterPath(path)) {
try {
if (type.equals(Type.ADD)) {
log.info("master node : {} added.", path);
} else if (type.equals(Type.REMOVE)) {
log.info("master node : {} down.", path);
alertDao.sendServerStoppedAlert(1, path, "MASTER");
listenerEventAlertManager.publishServerDownListenerEvent(path, "MASTER");
}
} catch (Exception ex) {
log.error("MasterNodeListener capture data change and get data failed.", ex);
}
}
}
}
private void updateMasterNodes() {
masterNodeInfoLock.lock();
try {
masterNodeInfo.clear();
Map<String, String> masterNodeMaps = registryClient.getServerMaps(RegistryNodeType.MASTER);
for (Map.Entry<String, String> entry : masterNodeMaps.entrySet()) {
masterNodeInfo.put(entry.getKey(), JSONUtils.parseObject(entry.getValue(), MasterHeartBeat.class));
}
} catch (Exception e) {
log.error("update master nodes error", e);
} finally {
masterNodeInfoLock.unlock();
}
}
private void updateWorkerNodes() {
workerNodeInfoWriteLock.lock();
try {
Map<String, String> workerNodeMaps = registryClient.getServerMaps(RegistryNodeType.WORKER);
for (Map.Entry<String, String> entry : workerNodeMaps.entrySet()) {
String nodeAddress = entry.getKey();
WorkerHeartBeat workerHeartBeat = JSONUtils.parseObject(entry.getValue(), WorkerHeartBeat.class);
workerNodeInfo.put(nodeAddress, workerHeartBeat);
}
} finally {
workerNodeInfoWriteLock.unlock();
}
}
private void updateWorkerGroupMappings() {
List<WorkerGroup> workerGroups = workerGroupMapper.queryAllWorkerGroup();
Map<String, Set<String>> tmpWorkerGroupMappings = new HashMap<>();
try {
workerNodeInfoReadLock.lock();
for (WorkerGroup workerGroup : workerGroups) {
String workerGroupName = workerGroup.getName();
String[] workerAddresses = workerGroup.getAddrList().split(Constants.COMMA);
if (ArrayUtils.isEmpty(workerAddresses)) {
continue;
}
Set<String> activeWorkerNodes = Arrays.stream(workerAddresses)
.filter(workerNodeInfo::containsKey).collect(Collectors.toSet());
tmpWorkerGroupMappings.put(workerGroupName, activeWorkerNodes);
}
if (!tmpWorkerGroupMappings.containsKey(WorkerGroupUtils.getDefaultWorkerGroup())) {
tmpWorkerGroupMappings.put(WorkerGroupUtils.getDefaultWorkerGroup(), workerNodeInfo.keySet());
}
} finally {
workerNodeInfoReadLock.unlock();
}
workerGroupWriteLock.lock();
try {
workerGroupNodes.clear();
workerGroupNodes.putAll(tmpWorkerGroupMappings);
} finally {
workerGroupWriteLock.unlock();
}
}
public Map<String, Set<String>> getWorkerGroupNodes() {
workerGroupReadLock.lock();
try {
return Collections.unmodifiableMap(workerGroupNodes);
} finally {
workerGroupReadLock.unlock();
}
}
/**
* get worker group nodes
*
* @param workerGroup workerGroup
* @return worker nodes
*/
public Set<String> getWorkerGroupNodes(String workerGroup) throws WorkerGroupNotFoundException {
workerGroupReadLock.lock();
try {
workerGroup = WorkerGroupUtils.getWorkerGroupOrDefault(workerGroup);
Set<String> nodes = workerGroupNodes.get(workerGroup);
if (nodes == null) {
throw new WorkerGroupNotFoundException(String.format("WorkerGroup: %s is invalidated", workerGroup));
}
if (CollectionUtils.isEmpty(nodes)) {
return Collections.emptySet();
}
return Collections.unmodifiableSet(nodes);
} finally {
workerGroupReadLock.unlock();
}
}
public Map<String, WorkerHeartBeat> getWorkerNodeInfo() {
return Collections.unmodifiableMap(workerNodeInfo);
}
public Optional<WorkerHeartBeat> getWorkerNodeInfo(String workerServerAddress) {
workerNodeInfoReadLock.lock();
try {
return Optional.ofNullable(workerNodeInfo.getOrDefault(workerServerAddress, null));
} finally {
workerNodeInfoReadLock.unlock();
}
}
public Map<String, MasterHeartBeat> getMasterNodeInfo() {
return Collections.unmodifiableMap(masterNodeInfo);
}
/**
* Add the resource change listener, when the resource changed, the listener will be notified.
*
* @param listener will be trigger, when the worker node info changed.
*/
public synchronized void addWorkerInfoChangeListener(WorkerInfoChangeListener listener) {
workerInfoChangeListeners.add(listener);
}
private void notifyWorkerInfoChangeListeners() {
Map<String, Set<String>> workerGroupNodeMap = getWorkerGroupNodes();
Map<String, WorkerHeartBeat> workerNodeInfoMap = getWorkerNodeInfo();
for (WorkerInfoChangeListener listener : workerInfoChangeListeners) {
listener.notify(workerGroupNodeMap, workerNodeInfoMap);
}
}
public synchronized void addMasterInfoChangeListener(MasterInfoChangeListener listener) {
masterInfoChangeListeners.add(listener);
}
private void notifyMasterInfoChangeListeners() {
Map<String, MasterHeartBeat> masterNodeInfoMap = getMasterNodeInfo();
for (MasterInfoChangeListener listener : masterInfoChangeListeners) {
listener.notify(masterNodeInfoMap);
}
}
@PreDestroy
public void destroy() {
executorService.shutdownNow();
}
}

38
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/WorkerInfoChangeListener.java

@ -1,38 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.registry;
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat;
import java.util.Map;
import java.util.Set;
/**
* The listener used in {@link ServerNodeManager} to notify the change of worker info.
*/
public interface WorkerInfoChangeListener {
/**
* Used to notify the change of worker info.
*
* @param workerGroups worker groups map, key is worker group name, value is worker address.
* @param workerNodeInfo worker node info map, key is worker address, value is worker info.
*/
void notify(Map<String, Set<String>> workerGroups, Map<String, WorkerHeartBeat> workerNodeInfo);
}

102
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/BaseTaskDispatcher.java

@ -1,102 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.runner;
import static com.google.common.base.Preconditions.checkNotNull;
import org.apache.dolphinscheduler.common.enums.TaskEventType;
import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.extract.base.utils.Host;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.enums.TaskExecutionStatus;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.exception.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEvent;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEventService;
import org.apache.dolphinscheduler.server.master.runner.dispatcher.TaskDispatcher;
import java.util.Date;
import java.util.Optional;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public abstract class BaseTaskDispatcher implements TaskDispatcher {
protected final TaskEventService taskEventService;
protected final MasterConfig masterConfig;
protected BaseTaskDispatcher(TaskEventService taskEventService,
MasterConfig masterConfig) {
this.taskEventService = checkNotNull(taskEventService);
this.masterConfig = checkNotNull(masterConfig);
}
@Override
public void dispatchTask(TaskExecuteRunnable taskExecuteRunnable) throws TaskDispatchException {
Host taskInstanceDispatchHost;
try {
taskInstanceDispatchHost = getTaskInstanceDispatchHost(taskExecuteRunnable)
.orElseThrow(() -> new TaskDispatchException("Cannot find the host to execute task."));
} catch (WorkerGroupNotFoundException workerGroupNotFoundException) {
// todo: this is a temporary solution, we should refactor the ServerNodeManager to make sure there won't
// throw WorkerGroupNotFoundException unless the worker group is not exist in database
throw new TaskDispatchException(
"Dispatch task: " + taskExecuteRunnable.getTaskExecutionContext().getTaskName() + " failed",
workerGroupNotFoundException);
}
taskExecuteRunnable.getTaskExecutionContext().setHost(taskInstanceDispatchHost.getAddress());
doDispatch(taskExecuteRunnable);
taskExecuteRunnable.getTaskInstance().setHost(taskInstanceDispatchHost.getAddress());
log.info("Success dispatch task {} to {}.", taskExecuteRunnable.getTaskExecutionContext().getTaskName(),
taskInstanceDispatchHost.getAddress());
addDispatchEvent(taskExecuteRunnable);
}
protected abstract void doDispatch(TaskExecuteRunnable taskExecuteRunnable) throws TaskDispatchException;
protected abstract Optional<Host> getTaskInstanceDispatchHost(TaskExecuteRunnable taskExecutionContext) throws TaskDispatchException, WorkerGroupNotFoundException;
protected void addDispatchEvent(TaskExecuteRunnable taskExecuteRunnable) {
TaskExecutionContext taskExecutionContext = taskExecuteRunnable.getTaskExecutionContext();
TaskEvent taskEvent = TaskEvent.newDispatchEvent(
taskExecutionContext.getProcessInstanceId(),
taskExecutionContext.getTaskInstanceId(),
taskExecutionContext.getHost());
taskEventService.addEvent(taskEvent);
}
private void addDispatchFailedEvent(TaskExecuteRunnable taskExecuteRunnable) {
TaskExecutionContext taskExecutionContext = taskExecuteRunnable.getTaskExecutionContext();
TaskEvent taskEvent = TaskEvent.builder()
.processInstanceId(taskExecutionContext.getProcessInstanceId())
.taskInstanceId(taskExecutionContext.getTaskInstanceId())
.state(TaskExecutionStatus.FAILURE)
.logPath(taskExecutionContext.getLogPath())
.executePath(taskExecutionContext.getExecutePath())
.appIds(taskExecutionContext.getAppIds())
.processId(taskExecutionContext.getProcessId())
.varPool(taskExecutionContext.getVarPool())
.startTime(DateUtils.timeStampToDate(taskExecutionContext.getStartTime()))
.endTime(new Date())
.event(TaskEventType.RESULT)
.build();
taskEventService.addEvent(taskEvent);
}
}

42
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/GlobalTaskDispatchWaitingQueueLooper.java

@ -17,11 +17,18 @@
package org.apache.dolphinscheduler.server.master.runner;
import org.apache.dolphinscheduler.common.enums.TaskEventType;
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread;
import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.enums.TaskExecutionStatus;
import org.apache.dolphinscheduler.server.master.exception.dispatch.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEvent;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEventService;
import org.apache.dolphinscheduler.server.master.runner.dispatcher.TaskDispatchFactory;
import java.util.Date;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
@ -39,6 +46,9 @@ public class GlobalTaskDispatchWaitingQueueLooper extends BaseDaemonThread imple
@Autowired
private TaskDispatchFactory taskDispatchFactory;
@Autowired
private TaskEventService taskEventService;
private final AtomicBoolean RUNNING_FLAG = new AtomicBoolean(false);
public GlobalTaskDispatchWaitingQueueLooper() {
@ -78,6 +88,11 @@ public class GlobalTaskDispatchWaitingQueueLooper extends BaseDaemonThread imple
return;
}
taskDispatchFactory.getTaskDispatcher(taskInstance).dispatchTask(taskExecuteRunnable);
} catch (WorkerGroupNotFoundException workerGroupNotFoundException) {
// If the worker group not found then the task will not be dispatched anymore
log.error("Dispatch Task: {} failed, will send task failed event", taskInstance.getName(),
workerGroupNotFoundException);
addDispatchFailedEvent(taskExecuteRunnable);
} catch (Exception e) {
// If dispatch failed, will put the task back to the queue
// The task will be dispatched after waiting time.
@ -98,4 +113,31 @@ public class GlobalTaskDispatchWaitingQueueLooper extends BaseDaemonThread imple
log.error("GlobalTaskDispatchWaitingQueueLooper is not started");
}
}
private void addDispatchSuccessEvent(TaskExecuteRunnable taskExecuteRunnable) {
TaskExecutionContext taskExecutionContext = taskExecuteRunnable.getTaskExecutionContext();
TaskEvent taskEvent = TaskEvent.newDispatchEvent(
taskExecutionContext.getProcessInstanceId(),
taskExecutionContext.getTaskInstanceId(),
taskExecutionContext.getHost());
taskEventService.addEvent(taskEvent);
}
private void addDispatchFailedEvent(TaskExecuteRunnable taskExecuteRunnable) {
TaskExecutionContext taskExecutionContext = taskExecuteRunnable.getTaskExecutionContext();
TaskEvent taskEvent = TaskEvent.builder()
.processInstanceId(taskExecutionContext.getProcessInstanceId())
.taskInstanceId(taskExecutionContext.getTaskInstanceId())
.state(TaskExecutionStatus.FAILURE)
.logPath(taskExecutionContext.getLogPath())
.executePath(taskExecutionContext.getExecutePath())
.appIds(taskExecutionContext.getAppIds())
.processId(taskExecutionContext.getProcessId())
.varPool(taskExecutionContext.getVarPool())
.startTime(DateUtils.timeStampToDate(taskExecutionContext.getStartTime()))
.endTime(new Date())
.event(TaskEventType.RESULT)
.build();
taskEventService.addEvent(taskEvent);
}
}

28
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteContextFactory.java

@ -17,7 +17,6 @@
package org.apache.dolphinscheduler.server.master.runner;
import org.apache.dolphinscheduler.common.enums.SlotCheckState;
import org.apache.dolphinscheduler.dao.entity.Command;
import org.apache.dolphinscheduler.dao.entity.ProcessDefinition;
import org.apache.dolphinscheduler.dao.entity.ProcessInstance;
@ -25,7 +24,6 @@ import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.graph.IWorkflowGraph;
import org.apache.dolphinscheduler.server.master.graph.WorkflowGraphFactory;
import org.apache.dolphinscheduler.server.master.metrics.ProcessInstanceMetrics;
import org.apache.dolphinscheduler.server.master.registry.MasterSlotManager;
import org.apache.dolphinscheduler.service.exceptions.CronParseException;
import org.apache.dolphinscheduler.service.process.ProcessService;
@ -40,9 +38,6 @@ import org.springframework.stereotype.Component;
@Component
public class WorkflowExecuteContextFactory {
@Autowired
private MasterSlotManager masterSlotManager;
@Autowired
private ProcessService processService;
@ -69,33 +64,10 @@ public class WorkflowExecuteContextFactory {
private Optional<ProcessInstance> createWorkflowInstance(Command command) throws CronParseException {
long commandTransformStartTime = System.currentTimeMillis();
// Note: this check is not safe, the slot may change after command transform.
// We use the database transaction in `handleCommand` so that we can guarantee the command will
// always be executed
// by only one master
SlotCheckState slotCheckState = slotCheck(command);
if (slotCheckState.equals(SlotCheckState.CHANGE) || slotCheckState.equals(SlotCheckState.INJECT)) {
log.info("Master handle command {} skip, slot check state: {}", command.getId(), slotCheckState);
throw new RuntimeException("Slot check failed the current state: " + slotCheckState);
}
ProcessInstance processInstance = processService.handleCommand(masterConfig.getMasterAddress(), command);
ProcessInstanceMetrics
.recordProcessInstanceGenerateTime(System.currentTimeMillis() - commandTransformStartTime);
return Optional.ofNullable(processInstance);
}
private SlotCheckState slotCheck(Command command) {
int slot = masterSlotManager.getSlot();
int masterSize = masterSlotManager.getMasterSize();
SlotCheckState state;
if (masterSize <= 0) {
state = SlotCheckState.CHANGE;
} else if (command.getId() % masterSize == slot) {
state = SlotCheckState.PASS;
} else {
state = SlotCheckState.INJECT;
}
return state;
}
}

56
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/BaseTaskDispatcher.java

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.runner.dispatcher;
import org.apache.dolphinscheduler.extract.base.utils.Host;
import org.apache.dolphinscheduler.server.master.exception.dispatch.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.exception.dispatch.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.runner.TaskExecuteRunnable;
import java.util.Optional;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public abstract class BaseTaskDispatcher implements TaskDispatcher {
@Override
public void dispatchTask(TaskExecuteRunnable taskExecuteRunnable) throws TaskDispatchException {
String taskName = taskExecuteRunnable.getTaskExecutionContext().getTaskName();
String taskInstanceDispatchAddress;
try {
taskInstanceDispatchAddress = getTaskInstanceDispatchHost(taskExecuteRunnable)
.map(Host::getAddress)
.orElseThrow(() -> new TaskDispatchException("Cannot find the host to execute task: " + taskName));
} catch (WorkerGroupNotFoundException workerGroupNotFoundException) {
// todo: this is a temporary solution, we should refactor the ServerNodeManager to make sure there won't
// throw WorkerGroupNotFoundException unless the worker group is not exist in database
throw new TaskDispatchException("Dispatch task: " + taskName + " failed", workerGroupNotFoundException);
}
taskExecuteRunnable.getTaskExecutionContext().setHost(taskInstanceDispatchAddress);
// todo: add dispatch address here to avoid set host in TaskExecuteRunnable before
doDispatch(taskExecuteRunnable);
taskExecuteRunnable.getTaskInstance().setHost(taskInstanceDispatchAddress);
log.info("Success dispatch task {} to {}.", taskName, taskInstanceDispatchAddress);
}
protected abstract void doDispatch(TaskExecuteRunnable taskExecuteRunnable) throws TaskDispatchException;
protected abstract Optional<Host> getTaskInstanceDispatchHost(TaskExecuteRunnable taskExecutionContext) throws TaskDispatchException, WorkerGroupNotFoundException;
}

10
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/MasterTaskDispatcher.java

@ -24,9 +24,7 @@ import org.apache.dolphinscheduler.extract.master.transportor.LogicTaskDispatchR
import org.apache.dolphinscheduler.extract.master.transportor.LogicTaskDispatchResponse;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.exception.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEventService;
import org.apache.dolphinscheduler.server.master.runner.BaseTaskDispatcher;
import org.apache.dolphinscheduler.server.master.exception.dispatch.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.runner.TaskExecuteRunnable;
import java.util.Optional;
@ -41,10 +39,8 @@ public class MasterTaskDispatcher extends BaseTaskDispatcher {
private final Optional<Host> masterTaskExecuteHost;
public MasterTaskDispatcher(TaskEventService taskEventService,
MasterConfig masterConfig) {
super(taskEventService, masterConfig);
masterTaskExecuteHost = Optional.of(Host.of(masterConfig.getMasterAddress()));
public MasterTaskDispatcher(MasterConfig masterConfig) {
this.masterTaskExecuteHost = Optional.of(Host.of(masterConfig.getMasterAddress()));
}
@Override

4
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/TaskDispatcher.java

@ -17,8 +17,8 @@
package org.apache.dolphinscheduler.server.master.runner.dispatcher;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.exception.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.exception.dispatch.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.exception.dispatch.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.runner.TaskExecuteRunnable;
/**

35
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/WorkerTaskDispatcher.java

@ -25,12 +25,8 @@ import org.apache.dolphinscheduler.extract.worker.ITaskInstanceOperator;
import org.apache.dolphinscheduler.extract.worker.transportor.TaskInstanceDispatchRequest;
import org.apache.dolphinscheduler.extract.worker.transportor.TaskInstanceDispatchResponse;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.dispatch.host.HostManager;
import org.apache.dolphinscheduler.server.master.exception.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEventService;
import org.apache.dolphinscheduler.server.master.runner.BaseTaskDispatcher;
import org.apache.dolphinscheduler.server.master.cluster.loadbalancer.IWorkerLoadBalancer;
import org.apache.dolphinscheduler.server.master.exception.dispatch.TaskDispatchException;
import org.apache.dolphinscheduler.server.master.runner.TaskExecuteRunnable;
import java.util.Optional;
@ -43,39 +39,36 @@ import org.springframework.stereotype.Component;
@Component
public class WorkerTaskDispatcher extends BaseTaskDispatcher {
private final HostManager hostManager;
private final IWorkerLoadBalancer workerLoadBalancer;
public WorkerTaskDispatcher(TaskEventService taskEventService,
MasterConfig masterConfig,
HostManager hostManager) {
super(taskEventService, masterConfig);
this.hostManager = checkNotNull(hostManager);
public WorkerTaskDispatcher(IWorkerLoadBalancer workerLoadBalancer) {
this.workerLoadBalancer = checkNotNull(workerLoadBalancer);
}
@Override
protected void doDispatch(TaskExecuteRunnable taskExecuteRunnable) throws TaskDispatchException {
TaskExecutionContext taskExecutionContext = taskExecuteRunnable.getTaskExecutionContext();
final TaskExecutionContext taskExecutionContext = taskExecuteRunnable.getTaskExecutionContext();
final String taskName = taskExecutionContext.getTaskName();
final String workerAddress = taskExecutionContext.getHost();
try {
ITaskInstanceOperator taskInstanceOperator = SingletonJdkDynamicRpcClientProxyFactory
.getProxyClient(taskExecutionContext.getHost(), ITaskInstanceOperator.class);
.getProxyClient(workerAddress, ITaskInstanceOperator.class);
TaskInstanceDispatchResponse taskInstanceDispatchResponse = taskInstanceOperator
.dispatchTask(new TaskInstanceDispatchRequest(taskExecuteRunnable.getTaskExecutionContext()));
if (!taskInstanceDispatchResponse.isDispatchSuccess()) {
throw new TaskDispatchException(String.format("Dispatch task to %s failed, response is: %s",
taskExecutionContext.getHost(), taskInstanceDispatchResponse));
throw new TaskDispatchException("Dispatch task: " + taskName + " to " + workerAddress + " failed: "
+ taskInstanceDispatchResponse);
}
} catch (TaskDispatchException e) {
throw e;
} catch (Exception e) {
throw new TaskDispatchException(String.format("Dispatch task to %s failed",
taskExecutionContext.getHost()), e);
throw new TaskDispatchException("Dispatch task: " + taskName + " to " + workerAddress + " failed", e);
}
}
@Override
protected Optional<Host> getTaskInstanceDispatchHost(TaskExecuteRunnable taskExecuteRunnable) throws WorkerGroupNotFoundException {
protected Optional<Host> getTaskInstanceDispatchHost(TaskExecuteRunnable taskExecuteRunnable) {
String workerGroup = taskExecuteRunnable.getTaskExecutionContext().getWorkerGroup();
return hostManager.select(workerGroup);
return workerLoadBalancer.select(workerGroup).map(Host::of);
}
}

21
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/Selector.java → dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/utils/MasterThreadFactory.java

@ -15,20 +15,15 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
package org.apache.dolphinscheduler.server.master.utils;
import java.util.Collection;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
/**
* selector
* @param <T> T
*/
public interface Selector<T> {
public class MasterThreadFactory {
public static ScheduledExecutorService getDefaultSchedulerThreadExecutor() {
return Executors.newSingleThreadScheduledExecutor();
}
/**
* select
* @param source source, the given source should not be empty.
* @return T
*/
T select(Collection<T> source);
}

12
dolphinscheduler-master/src/main/resources/application.yaml

@ -88,10 +88,6 @@ master:
pre-exec-threads: 10
# master execute thread number to limit process instances in parallel
exec-threads: 100
# master dispatch task number per batch, if all the tasks dispatch failed in a batch, will sleep 1s.
dispatch-task-number: 3
# master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight
host-selector: lower_weight
# master heartbeat interval
max-heartbeat-interval: 10s
# master commit task retry times
@ -127,6 +123,14 @@ master:
id-step: 1
# master fetch command num
fetch-size: 10
worker-load-balancer-configuration-properties:
# RANDOM, ROUND_ROBIN, FIXED_WEIGHTED_ROUND_ROBIN, DYNAMIC_WEIGHTED_ROUND_ROBIN
type: DYNAMIC_WEIGHTED_ROUND_ROBIN
# dynamic-weight-config-properties only used in DYNAMIC_WEIGHTED_ROUND_ROBIN, the weight of memory-usage, cpu-usage, task-thread-pool-usage should sum to 100.
dynamic-weight-config-properties:
memory-usage-weight: 30
cpu-usage-weight: 30
task-thread-pool-usage-weight: 40
server:
port: 5679

106
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/MasterClustersTest.java

@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import static com.google.common.truth.Truth.assertThat;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import java.util.concurrent.atomic.AtomicBoolean;
import org.junit.jupiter.api.Test;
class MasterClustersTest {
@Test
void getServers() {
MasterClusters masterClusters = new MasterClusters();
MasterServerMetadata normalMasterServerMetadata = getNormalMasterServerMetadata();
MasterServerMetadata busyMasterServerMetadata = getBusyMasterServerMetadata();
masterClusters.onServerAdded(normalMasterServerMetadata);
masterClusters.onServerAdded(busyMasterServerMetadata);
assertThat(masterClusters.getServers()).containsExactly(normalMasterServerMetadata, busyMasterServerMetadata);
}
@Test
void getNormalServers() {
MasterClusters masterClusters = new MasterClusters();
MasterServerMetadata normalMasterServerMetadata = getNormalMasterServerMetadata();
MasterServerMetadata busyMasterServerMetadata = getBusyMasterServerMetadata();
masterClusters.onServerAdded(normalMasterServerMetadata);
masterClusters.onServerAdded(busyMasterServerMetadata);
assertThat(masterClusters.getNormalServers()).containsExactly(normalMasterServerMetadata);
}
@Test
void registerListener() {
MasterClusters masterClusters = new MasterClusters();
AtomicBoolean addServerFlag = new AtomicBoolean(false);
masterClusters.registerListener(
(IClusters.ServerAddedListener<MasterServerMetadata>) server -> addServerFlag.set(true));
masterClusters.onServerAdded(getNormalMasterServerMetadata());
assertThat(addServerFlag.get()).isTrue();
}
@Test
void onServerAdded() {
MasterClusters masterClusters = new MasterClusters();
masterClusters.onServerAdded(getNormalMasterServerMetadata());
assertThat(masterClusters.getServers()).containsExactly(getNormalMasterServerMetadata());
}
@Test
void onServerRemove() {
MasterClusters masterClusters = new MasterClusters();
masterClusters.onServerAdded(getNormalMasterServerMetadata());
masterClusters.onServerRemove(getNormalMasterServerMetadata());
assertThat(masterClusters.getServers()).isEmpty();
}
@Test
void onServerUpdate() {
MasterClusters masterClusters = new MasterClusters();
masterClusters.onServerAdded(getNormalMasterServerMetadata());
MasterServerMetadata updatedMasterServerMetadata = MasterServerMetadata.builder()
.address("127.0.0.1:5679")
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.BUSY)
.build();
masterClusters.onServerUpdate(updatedMasterServerMetadata);
assertThat(masterClusters.getServers()).containsExactly(updatedMasterServerMetadata);
}
private MasterServerMetadata getNormalMasterServerMetadata() {
return MasterServerMetadata.builder()
.address("127.0.0.1:5679")
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.NORMAL)
.build();
}
private MasterServerMetadata getBusyMasterServerMetadata() {
return MasterServerMetadata.builder()
.address("127.0.0.2:1235")
.cpuUsage(0.8)
.memoryUsage(0.8)
.serverStatus(ServerStatus.BUSY)
.build();
}
}

50
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/MasterServerMetadataTest.java

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.common.model.MasterHeartBeat;
import org.junit.jupiter.api.Test;
import com.google.common.truth.Truth;
class MasterServerMetadataTest {
@Test
void parseFromHeartBeat() {
MasterHeartBeat masterHeartBeat = MasterHeartBeat.builder()
.processId(30001)
.startupTime(System.currentTimeMillis())
.reportTime(System.currentTimeMillis())
.jvmCpuUsage(0.1)
.cpuUsage(0.2)
.jvmMemoryUsage(0.3)
.memoryUsage(0.4)
.diskUsage(0.5)
.serverStatus(ServerStatus.NORMAL)
.host("localhost")
.port(12345)
.build();
MasterServerMetadata masterServer = MasterServerMetadata.parseFromHeartBeat(masterHeartBeat);
Truth.assertThat(masterServer.getCpuUsage()).isEqualTo(0.2);
Truth.assertThat(masterServer.getMemoryUsage()).isEqualTo(0.4);
Truth.assertThat(masterServer.getServerStatus()).isEqualTo(ServerStatus.NORMAL);
Truth.assertThat(masterServer.getAddress()).isEqualTo("localhost:12345");
}
}

106
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/MasterSlotManagerTest.java

@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import static com.google.common.truth.Truth.assertThat;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
class MasterSlotManagerTest {
private MasterSlotManager masterSlotManager;
private ClusterManager clusterManager;
private MasterConfig masterConfig;
@BeforeEach
public void setUp() {
clusterManager = new ClusterManager();
masterConfig = new MasterConfig();
masterConfig.setMasterAddress("127.0.0.1:5678");
masterSlotManager = new MasterSlotManager(clusterManager, masterConfig);
MasterServerMetadata master1 = MasterServerMetadata.builder()
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.NORMAL)
.address(masterConfig.getMasterAddress())
.build();
MasterServerMetadata master2 = MasterServerMetadata.builder()
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.NORMAL)
.address("127.0.0.2:5679")
.build();
MasterServerMetadata master3 = MasterServerMetadata.builder()
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.NORMAL)
.address("127.0.0.3:5679")
.build();
MasterServerMetadata master4 = MasterServerMetadata.builder()
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.BUSY)
.address("127.0.0.4:5679")
.build();
clusterManager.getMasterClusters().onServerAdded(master1);
clusterManager.getMasterClusters().onServerAdded(master2);
clusterManager.getMasterClusters().onServerAdded(master3);
clusterManager.getMasterClusters().onServerAdded(master4);
}
@Test
void getCurrentMasterSlot() {
assertThat(masterSlotManager.getCurrentMasterSlot()).isAtLeast(0);
}
@Test
void getTotalMasterSlots() {
assertThat(masterSlotManager.getTotalMasterSlots()).isEqualTo(3);
}
@Test
void checkSlotValid() {
assertThat(masterSlotManager.checkSlotValid()).isTrue();
}
@Test
void doReBalance() {
MasterServerMetadata master2 = MasterServerMetadata.builder()
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.NORMAL)
.address("127.0.0.3:5679")
.build();
MasterServerMetadata master3 = MasterServerMetadata.builder()
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.BUSY)
.address("127.0.0.4:5679")
.build();
clusterManager.getMasterClusters().onServerRemove(master2);
clusterManager.getMasterClusters().onServerRemove(master3);
// After doReBalance, the total master slots should be 2
assertThat(masterSlotManager.getTotalMasterSlots()).isEqualTo(2);
}
}

167
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/WorkerClustersTest.java

@ -0,0 +1,167 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import static com.google.common.truth.Truth.assertThat;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Lists;
import com.google.common.truth.Truth;
class WorkerClustersTest {
@Test
void testOnWorkerGroupDelete() {
WorkerClusters workerClusters = new WorkerClusters();
WorkerServerMetadata normalWorkerServerMetadata = getNormalWorkerServerMetadata();
workerClusters.onServerAdded(normalWorkerServerMetadata);
WorkerGroup workerGroup = WorkerGroup.builder()
.name("flinkCluster")
.addrList(normalWorkerServerMetadata.getAddress())
.build();
workerClusters.onWorkerGroupAdd(Lists.newArrayList(workerGroup));
assertThat(workerClusters.getWorkerServerAddressByGroup("flinkCluster"))
.containsExactly(normalWorkerServerMetadata.getAddress());
workerClusters.onWorkerGroupDelete(Lists.newArrayList(workerGroup));
Truth.assertThat(workerClusters.containsWorkerGroup("flinkCluster")).isFalse();
assertThat(workerClusters.getWorkerServerAddressByGroup("flinkCluster")).isEmpty();
}
@Test
void testOnWorkerGroupAdd() {
WorkerClusters workerClusters = new WorkerClusters();
WorkerServerMetadata normalWorkerServerMetadata = getNormalWorkerServerMetadata();
workerClusters.onServerAdded(normalWorkerServerMetadata);
WorkerGroup workerGroup = WorkerGroup.builder()
.name("flinkCluster")
.addrList(normalWorkerServerMetadata.getAddress())
.build();
workerClusters.onWorkerGroupAdd(Lists.newArrayList(workerGroup));
assertThat(workerClusters.getWorkerServerAddressByGroup("flinkCluster"))
.containsExactly(normalWorkerServerMetadata.getAddress());
}
@Test
void testOnWorkerGroupChange() {
WorkerClusters workerClusters = new WorkerClusters();
WorkerServerMetadata normalWorkerServerMetadata = getNormalWorkerServerMetadata();
workerClusters.onServerAdded(normalWorkerServerMetadata);
WorkerGroup workerGroup = WorkerGroup.builder()
.name("flinkCluster")
.addrList(normalWorkerServerMetadata.getAddress())
.build();
workerClusters.onWorkerGroupAdd(Lists.newArrayList(workerGroup));
assertThat(workerClusters.getWorkerServerAddressByGroup("flinkCluster"))
.containsExactly(normalWorkerServerMetadata.getAddress());
WorkerGroup updatedWorkerGroup = WorkerGroup.builder()
.name("flinkCluster")
.addrList("")
.build();
workerClusters.onWorkerGroupChange(Lists.newArrayList(updatedWorkerGroup));
assertThat(workerClusters.getWorkerServerAddressByGroup("flinkCluster")).isEmpty();
assertThat(workerClusters.containsWorkerGroup("flinkCluster")).isTrue();
}
@Test
void testOnServerAdded() {
WorkerServerMetadata normalWorkerServerMetadata = getNormalWorkerServerMetadata();
WorkerServerMetadata busyWorkerServerMetadata = getBusyWorkerServerMetadata();
WorkerClusters workerClusters = new WorkerClusters();
workerClusters.onServerAdded(normalWorkerServerMetadata);
workerClusters.onServerAdded(busyWorkerServerMetadata);
assertThat(workerClusters.getWorkerServerAddressByGroup("default"))
.containsExactly(normalWorkerServerMetadata.getAddress(), busyWorkerServerMetadata.getAddress());
assertThat(workerClusters.getNormalWorkerServerAddressByGroup("default"))
.containsExactly(normalWorkerServerMetadata.getAddress());
}
@Test
void testOnServerRemove() {
WorkerServerMetadata normalWorkerServerMetadata = getNormalWorkerServerMetadata();
WorkerServerMetadata busyWorkerServerMetadata = getBusyWorkerServerMetadata();
WorkerClusters workerClusters = new WorkerClusters();
workerClusters.onServerAdded(normalWorkerServerMetadata);
workerClusters.onServerAdded(busyWorkerServerMetadata);
workerClusters.onServerRemove(busyWorkerServerMetadata);
assertThat(workerClusters.getWorkerServerAddressByGroup("default"))
.containsExactly(normalWorkerServerMetadata.getAddress());
assertThat(workerClusters.getNormalWorkerServerAddressByGroup("default"))
.containsExactly(normalWorkerServerMetadata.getAddress());
}
@Test
void testOnServerUpdate() {
WorkerServerMetadata normalWorkerServerMetadata = getNormalWorkerServerMetadata();
WorkerServerMetadata busyWorkerServerMetadata = getBusyWorkerServerMetadata();
WorkerClusters workerClusters = new WorkerClusters();
workerClusters.onServerAdded(normalWorkerServerMetadata);
workerClusters.onServerAdded(busyWorkerServerMetadata);
WorkerServerMetadata workerServerMetadata = WorkerServerMetadata.builder()
.address("127.0.0.2:1235")
.cpuUsage(0.3)
.memoryUsage(0.3)
.serverStatus(ServerStatus.NORMAL)
.taskThreadPoolUsage(0.3)
.workerWeight(2)
.build();
workerClusters.onServerUpdate(workerServerMetadata);
assertThat(workerClusters.getWorkerServerAddressByGroup("default"))
.containsExactly(normalWorkerServerMetadata.getAddress(), workerServerMetadata.getAddress());
assertThat(workerClusters.getNormalWorkerServerAddressByGroup("default"))
.containsExactly(normalWorkerServerMetadata.getAddress(), workerServerMetadata.getAddress());
assertThat(workerClusters.getServers()).containsExactly(normalWorkerServerMetadata, workerServerMetadata);
}
private WorkerServerMetadata getNormalWorkerServerMetadata() {
return WorkerServerMetadata.builder()
.address("127.0.0.1:1235")
.cpuUsage(0.2)
.memoryUsage(0.4)
.serverStatus(ServerStatus.NORMAL)
.taskThreadPoolUsage(0.6)
.workerWeight(2)
.build();
}
private WorkerServerMetadata getBusyWorkerServerMetadata() {
return WorkerServerMetadata.builder()
.address("127.0.0.2:1235")
.cpuUsage(0.8)
.memoryUsage(0.8)
.serverStatus(ServerStatus.BUSY)
.taskThreadPoolUsage(0.9)
.workerWeight(2)
.build();
}
}

158
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/WorkerGroupChangeNotifierTest.java

@ -0,0 +1,158 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import static com.google.common.truth.Truth.assertThat;
import static org.mockito.Mockito.when;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.dao.repository.WorkerGroupDao;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import com.google.common.collect.Lists;
class WorkerGroupChangeNotifierTest {
@Test
void detectWorkerGroupChanges_addedWorkerGroup() {
WorkerGroupDao workerGroupDao = Mockito.mock(WorkerGroupDao.class);
WorkerGroupChangeNotifier workerGroupChangeNotifier = new WorkerGroupChangeNotifier(workerGroupDao);
WorkerGroup workerGroup1 = WorkerGroup.builder()
.name("workerGroup1")
.addrList("")
.build();
when(workerGroupDao.queryAll()).thenReturn(Lists.newArrayList(workerGroup1));
AtomicBoolean workerGroupAdded = new AtomicBoolean(false);
AtomicBoolean workerGroupDeleted = new AtomicBoolean(false);
AtomicBoolean workerGroupChanged = new AtomicBoolean(false);
workerGroupChangeNotifier.subscribeWorkerGroupsChange(new WorkerGroupChangeNotifier.WorkerGroupListener() {
@Override
public void onWorkerGroupDelete(List<WorkerGroup> workerGroups) {
workerGroupDeleted.set(true);
}
@Override
public void onWorkerGroupAdd(List<WorkerGroup> workerGroups) {
workerGroupAdded.set(true);
assertThat(workerGroups).containsExactly(workerGroup1);
}
@Override
public void onWorkerGroupChange(List<WorkerGroup> workerGroups) {
workerGroupChanged.set(true);
}
});
workerGroupChangeNotifier.detectWorkerGroupChanges();
assertThat(workerGroupAdded.get()).isTrue();
assertThat(workerGroupChanged.get()).isFalse();
assertThat(workerGroupDeleted.get()).isFalse();
}
@Test
void detectWorkerGroupChanges_deleteWorkerGroup() {
WorkerGroupDao workerGroupDao = Mockito.mock(WorkerGroupDao.class);
WorkerGroupChangeNotifier workerGroupChangeNotifier = new WorkerGroupChangeNotifier(workerGroupDao);
WorkerGroup workerGroup1 = WorkerGroup.builder()
.name("workerGroup1")
.addrList("")
.build();
when(workerGroupDao.queryAll()).thenReturn(Lists.newArrayList(workerGroup1));
workerGroupChangeNotifier.detectWorkerGroupChanges();
when(workerGroupDao.queryAll()).thenReturn(Lists.newArrayList());
AtomicBoolean workerGroupAdded = new AtomicBoolean(false);
AtomicBoolean workerGroupDeleted = new AtomicBoolean(false);
AtomicBoolean workerGroupChanged = new AtomicBoolean(false);
workerGroupChangeNotifier.subscribeWorkerGroupsChange(new WorkerGroupChangeNotifier.WorkerGroupListener() {
@Override
public void onWorkerGroupDelete(List<WorkerGroup> workerGroups) {
workerGroupDeleted.set(true);
assertThat(workerGroups).containsExactly(workerGroup1);
}
@Override
public void onWorkerGroupAdd(List<WorkerGroup> workerGroups) {
workerGroupAdded.set(true);
}
@Override
public void onWorkerGroupChange(List<WorkerGroup> workerGroups) {
workerGroupChanged.set(true);
}
});
workerGroupChangeNotifier.detectWorkerGroupChanges();
assertThat(workerGroupAdded.get()).isFalse();
assertThat(workerGroupChanged.get()).isFalse();
assertThat(workerGroupDeleted.get()).isTrue();
assertThat(workerGroupChangeNotifier.getWorkerGroupMap()).isEmpty();
}
@Test
void detectWorkerGroupChanges_updateWorkerGroup() {
WorkerGroupDao workerGroupDao = Mockito.mock(WorkerGroupDao.class);
WorkerGroupChangeNotifier workerGroupChangeNotifier = new WorkerGroupChangeNotifier(workerGroupDao);
WorkerGroup workerGroup1 = WorkerGroup.builder()
.name("workerGroup1")
.addrList("")
.build();
when(workerGroupDao.queryAll()).thenReturn(Lists.newArrayList(workerGroup1));
workerGroupChangeNotifier.detectWorkerGroupChanges();
WorkerGroup updatedWorkerGroup1 = WorkerGroup.builder()
.name("workerGroup1")
.addrList("127.0.0.1:1235")
.build();
when(workerGroupDao.queryAll()).thenReturn(Lists.newArrayList(updatedWorkerGroup1));
AtomicBoolean workerGroupAdded = new AtomicBoolean(false);
AtomicBoolean workerGroupDeleted = new AtomicBoolean(false);
AtomicBoolean workerGroupChanged = new AtomicBoolean(false);
workerGroupChangeNotifier.subscribeWorkerGroupsChange(new WorkerGroupChangeNotifier.WorkerGroupListener() {
@Override
public void onWorkerGroupDelete(List<WorkerGroup> workerGroups) {
workerGroupDeleted.set(true);
}
@Override
public void onWorkerGroupAdd(List<WorkerGroup> workerGroups) {
workerGroupAdded.set(true);
}
@Override
public void onWorkerGroupChange(List<WorkerGroup> workerGroups) {
workerGroupChanged.set(true);
assertThat(workerGroups).containsExactly(updatedWorkerGroup1);
}
});
workerGroupChangeNotifier.detectWorkerGroupChanges();
assertThat(workerGroupAdded.get()).isFalse();
assertThat(workerGroupChanged.get()).isTrue();
assertThat(workerGroupDeleted.get()).isFalse();
assertThat(workerGroupChangeNotifier.getWorkerGroupMap()).containsEntry("workerGroup1", updatedWorkerGroup1);
}
}

53
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/WorkerServerMetadataTest.java

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat;
import org.junit.jupiter.api.Test;
import com.google.common.truth.Truth;
class WorkerServerMetadataTest {
@Test
void parseFromHeartBeat() {
WorkerHeartBeat workerHeartBeat = WorkerHeartBeat.builder()
.processId(30001)
.startupTime(System.currentTimeMillis())
.reportTime(System.currentTimeMillis())
.jvmCpuUsage(0.1)
.cpuUsage(0.2)
.jvmMemoryUsage(0.3)
.memoryUsage(0.4)
.diskUsage(0.5)
.serverStatus(ServerStatus.NORMAL)
.host("localhost")
.port(12345)
.workerHostWeight(2)
.threadPoolUsage(0.6)
.build();
WorkerServerMetadata workerServerMetadata = WorkerServerMetadata.parseFromHeartBeat(workerHeartBeat);
Truth.assertThat(workerServerMetadata.getCpuUsage()).isEqualTo(0.2);
Truth.assertThat(workerServerMetadata.getMemoryUsage()).isEqualTo(0.4);
Truth.assertThat(workerServerMetadata.getServerStatus()).isEqualTo(ServerStatus.NORMAL);
Truth.assertThat(workerServerMetadata.getAddress()).isEqualTo("localhost:12345");
Truth.assertThat(workerServerMetadata.getWorkerWeight()).isEqualTo(2);
Truth.assertThat(workerServerMetadata.getTaskThreadPoolUsage()).isEqualTo(0.6);
}
}

82
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/BaseWorkerLoadBalancerTest.java

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import org.apache.dolphinscheduler.server.master.cluster.WorkerServerMetadata;
import java.util.List;
import com.google.common.collect.Lists;
public class BaseWorkerLoadBalancerTest {
/**
* "default" -> 127.0.0.1:1234, 127.0.0.2:1234, 127.0.0.3:1234, 127.0.0.4:1234
* "sparkCluster" -> 127.0.0.1:1234
* "flinkCluster" ->
* "busyCluster" -> 127.0.0.4:1234
*/
protected WorkerClusters getDefaultWorkerClusters() {
List<WorkerGroup> workerGroups = Lists.newArrayList(
WorkerGroup.builder()
.name("sparkCluster")
.addrList("127.0.0.1:1234")
.build(),
WorkerGroup.builder()
.name("flinkCluster")
.build(),
WorkerGroup.builder()
.name("busyCluster")
.build());
List<WorkerServerMetadata> workerServers = Lists.newArrayList(
WorkerServerMetadata.builder()
.address("127.0.0.1:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.1)
.memoryUsage(0.1)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.2:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.2)
.memoryUsage(0.2)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.3:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.3)
.memoryUsage(0.3)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.4:1234")
.serverStatus(ServerStatus.BUSY)
.cpuUsage(0.8)
.memoryUsage(0.8)
.build());
WorkerClusters workerClusters = new WorkerClusters();
workerClusters.onWorkerGroupAdd(workerGroups);
workerServers.forEach(workerClusters::onServerAdded);
return workerClusters;
}
}

119
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/DynamicWeightedRoundRobinWorkerLoadBalancerTest.java

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import org.apache.dolphinscheduler.server.master.cluster.WorkerServerMetadata;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Lists;
import com.google.common.truth.Truth;
class DynamicWeightedRoundRobinWorkerLoadBalancerTest {
@Test
void select_defaultWorkerGroup() {
DynamicWeightedRoundRobinWorkerLoadBalancer loadBalancer = createDynamicWeightedRoundRobinWorkerLoadBalancer();
// select 10000 times to check if the selected worker is in the worker list
Map<String, Integer> workerSelectedCount = new HashMap<>();
for (int i = 0; i < 10000; i++) {
Optional<String> selectedWorker = loadBalancer.select("default");
Truth.assertThat(selectedWorker).isPresent();
workerSelectedCount.put(selectedWorker.get(),
workerSelectedCount.getOrDefault(selectedWorker.get(), 0) + 1);
}
// Assert the three workers selected times are similar
Integer times1 = workerSelectedCount.get("127.0.0.1:1234");
Integer times2 = workerSelectedCount.get("127.0.0.2:1234");
Integer times3 = workerSelectedCount.get("127.0.0.3:1234");
Truth.assertThat(Math.abs(times1 - times2 * 2)).isWithin(10);
Truth.assertThat(Math.abs(times1 - times3 * 3)).isWithin(10);
Truth.assertThat(times1 + times2 + times3).isEqualTo(10000);
}
@Test
void select_emptyWorkerGroup() {
DynamicWeightedRoundRobinWorkerLoadBalancer loadBalancer = createDynamicWeightedRoundRobinWorkerLoadBalancer();
Truth.assertThat(loadBalancer.select("busyCluster")).isEmpty();
}
@Test
void select_workerGroupNotExist() {
DynamicWeightedRoundRobinWorkerLoadBalancer loadBalancer = createDynamicWeightedRoundRobinWorkerLoadBalancer();
Truth.assertThat(loadBalancer.select("notExistCluster")).isEmpty();
}
private DynamicWeightedRoundRobinWorkerLoadBalancer createDynamicWeightedRoundRobinWorkerLoadBalancer() {
List<WorkerGroup> workerGroups = Lists.newArrayList(
WorkerGroup.builder()
.name("sparkCluster")
.addrList("127.0.0.1:1234")
.build(),
WorkerGroup.builder()
.name("flinkCluster")
.build(),
WorkerGroup.builder()
.name("busyCluster")
.build());
List<WorkerServerMetadata> workerServers = Lists.newArrayList(
WorkerServerMetadata.builder()
.address("127.0.0.1:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.1)
.memoryUsage(0.1)
.taskThreadPoolUsage(0.1)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.2:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.2)
.memoryUsage(0.2)
.taskThreadPoolUsage(0.2)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.3:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.3)
.memoryUsage(0.3)
.taskThreadPoolUsage(0.3)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.4:1234")
.serverStatus(ServerStatus.BUSY)
.cpuUsage(0.8)
.memoryUsage(0.8)
.build());
WorkerClusters workerClusters = new WorkerClusters();
DynamicWeightedRoundRobinWorkerLoadBalancer dynamicWeightedRoundRobinWorkerLoadBalancer =
new DynamicWeightedRoundRobinWorkerLoadBalancer(workerClusters,
new WorkerLoadBalancerConfigurationProperties.DynamicWeightConfigProperties());
workerClusters.onWorkerGroupChange(workerGroups);
workerServers.forEach(workerClusters::onServerAdded);
return dynamicWeightedRoundRobinWorkerLoadBalancer;
}
}

118
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/FixedWeightedRoundRobinWorkerLoadBalancerTest.java

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.dao.entity.WorkerGroup;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import org.apache.dolphinscheduler.server.master.cluster.WorkerServerMetadata;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Lists;
import com.google.common.truth.Truth;
class FixedWeightedRoundRobinWorkerLoadBalancerTest {
@Test
void select_defaultWorkerGroup() {
FixedWeightedRoundRobinWorkerLoadBalancer loadBalancer = createFixedWeightedRoundRobinWorkerLoadBalancer();
// select 10000 times to check if the selected worker is in the worker list
Map<String, Integer> workerSelectedCount = new HashMap<>();
for (int i = 0; i < 10000; i++) {
Optional<String> selectedWorker = loadBalancer.select("default");
Truth.assertThat(selectedWorker).isPresent();
workerSelectedCount.put(selectedWorker.get(),
workerSelectedCount.getOrDefault(selectedWorker.get(), 0) + 1);
}
// Assert the three workers selected times are similar
Integer times1 = workerSelectedCount.get("127.0.0.1:1234");
Integer times2 = workerSelectedCount.get("127.0.0.2:1234");
Integer times3 = workerSelectedCount.get("127.0.0.3:1234");
Truth.assertThat(Math.abs(times2 - times1 * 2)).isWithin(10);
Truth.assertThat(Math.abs(times3 - times1 * 3)).isWithin(10);
Truth.assertThat(times1 + times2 + times3).isEqualTo(10000);
}
@Test
void select_emptyWorkerGroup() {
FixedWeightedRoundRobinWorkerLoadBalancer loadBalancer = createFixedWeightedRoundRobinWorkerLoadBalancer();
Truth.assertThat(loadBalancer.select("busyCluster")).isEmpty();
}
@Test
void select_workerGroupNotExist() {
FixedWeightedRoundRobinWorkerLoadBalancer loadBalancer = createFixedWeightedRoundRobinWorkerLoadBalancer();
Truth.assertThat(loadBalancer.select("notExistCluster")).isEmpty();
}
private FixedWeightedRoundRobinWorkerLoadBalancer createFixedWeightedRoundRobinWorkerLoadBalancer() {
List<WorkerGroup> workerGroups = Lists.newArrayList(
WorkerGroup.builder()
.name("sparkCluster")
.addrList("127.0.0.1:1234")
.build(),
WorkerGroup.builder()
.name("flinkCluster")
.build(),
WorkerGroup.builder()
.name("busyCluster")
.build());
List<WorkerServerMetadata> workerServers = Lists.newArrayList(
WorkerServerMetadata.builder()
.address("127.0.0.1:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.1)
.memoryUsage(0.1)
.workerWeight(1)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.2:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.2)
.memoryUsage(0.2)
.workerWeight(2)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.3:1234")
.serverStatus(ServerStatus.NORMAL)
.cpuUsage(0.3)
.memoryUsage(0.3)
.workerWeight(3)
.build(),
WorkerServerMetadata.builder()
.address("127.0.0.4:1234")
.serverStatus(ServerStatus.BUSY)
.cpuUsage(0.8)
.memoryUsage(0.8)
.build());
WorkerClusters workerClusters = new WorkerClusters();
FixedWeightedRoundRobinWorkerLoadBalancer loadBalancer =
new FixedWeightedRoundRobinWorkerLoadBalancer(workerClusters);
workerClusters.onWorkerGroupChange(workerGroups);
workerServers.forEach(workerClusters::onServerAdded);
return loadBalancer;
}
}

67
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RandomWorkerLoadBalancerTest.java

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.junit.jupiter.api.Test;
import com.google.common.truth.Truth;
class RandomWorkerLoadBalancerTest extends BaseWorkerLoadBalancerTest {
@Test
void select_defaultWorkerGroup() {
WorkerClusters defaultWorkerClusters = getDefaultWorkerClusters();
RandomWorkerLoadBalancer randomWorkerLoadBalancer = new RandomWorkerLoadBalancer(defaultWorkerClusters);
// select 10000 times to check if the selected worker is in the worker list
Map<String, Integer> workerSelectedCount = new HashMap<>();
for (int i = 0; i < 10000; i++) {
Optional<String> selectedWorker = randomWorkerLoadBalancer.select("default");
Truth.assertThat(selectedWorker).isPresent();
workerSelectedCount.put(selectedWorker.get(),
workerSelectedCount.getOrDefault(selectedWorker.get(), 0) + 1);
}
// Assert the three workers selected times are similar
Integer times1 = workerSelectedCount.get("127.0.0.1:1234");
Integer times2 = workerSelectedCount.get("127.0.0.2:1234");
Integer times3 = workerSelectedCount.get("127.0.0.3:1234");
Truth.assertThat(Math.abs(times1 - times2)).isWithin(10);
Truth.assertThat(Math.abs(times1 - times3)).isWithin(10);
Truth.assertThat(Math.abs(times2 - times3)).isWithin(10);
}
@Test
void select_emptyWorkerGroup() {
WorkerClusters defaultWorkerClusters = getDefaultWorkerClusters();
RandomWorkerLoadBalancer randomWorkerLoadBalancer = new RandomWorkerLoadBalancer(defaultWorkerClusters);
Truth.assertThat(randomWorkerLoadBalancer.select("busyCluster")).isEmpty();
}
@Test
void select_workerGroupNotExist() {
WorkerClusters defaultWorkerClusters = getDefaultWorkerClusters();
RandomWorkerLoadBalancer randomWorkerLoadBalancer = new RandomWorkerLoadBalancer(defaultWorkerClusters);
Truth.assertThat(randomWorkerLoadBalancer.select("notExistCluster")).isEmpty();
}
}

69
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/cluster/loadbalancer/RoundRobinWorkerLoadBalancerTest.java

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.cluster.loadbalancer;
import org.apache.dolphinscheduler.server.master.cluster.WorkerClusters;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.junit.jupiter.api.Test;
import com.google.common.truth.Truth;
class RoundRobinWorkerLoadBalancerTest extends BaseWorkerLoadBalancerTest {
@Test
void select_defaultWorkerGroup() {
WorkerClusters defaultWorkerClusters = getDefaultWorkerClusters();
RoundRobinWorkerLoadBalancer roundRobinWorkerLoadBalancer =
new RoundRobinWorkerLoadBalancer(defaultWorkerClusters);
// select 10000 times to check if the selected worker is in the worker list
Map<String, Integer> workerSelectedCount = new HashMap<>();
for (int i = 0; i < 10000; i++) {
Optional<String> selectedWorker = roundRobinWorkerLoadBalancer.select("default");
Truth.assertThat(selectedWorker).isPresent();
workerSelectedCount.put(selectedWorker.get(),
workerSelectedCount.getOrDefault(selectedWorker.get(), 0) + 1);
}
// Assert the three workers selected times are similar
Integer times1 = workerSelectedCount.get("127.0.0.1:1234");
Integer times2 = workerSelectedCount.get("127.0.0.2:1234");
Integer times3 = workerSelectedCount.get("127.0.0.3:1234");
Truth.assertThat(Math.abs(times1 - times2)).isWithin(10);
Truth.assertThat(Math.abs(times1 - times3)).isWithin(10);
Truth.assertThat(Math.abs(times2 - times3)).isWithin(10);
}
@Test
void select_emptyWorkerGroup() {
WorkerClusters defaultWorkerClusters = getDefaultWorkerClusters();
RoundRobinWorkerLoadBalancer roundRobinWorkerLoadBalancer =
new RoundRobinWorkerLoadBalancer(defaultWorkerClusters);
Truth.assertThat(roundRobinWorkerLoadBalancer.select("busyCluster")).isEmpty();
}
@Test
void select_workerGroupNotExist() {
WorkerClusters defaultWorkerClusters = getDefaultWorkerClusters();
RoundRobinWorkerLoadBalancer roundRobinWorkerLoadBalancer =
new RoundRobinWorkerLoadBalancer(defaultWorkerClusters);
Truth.assertThat(roundRobinWorkerLoadBalancer.select("notExistCluster")).isEmpty();
}
}

23
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterConfigTest.java

@ -21,6 +21,9 @@ import static com.google.common.truth.Truth.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.apache.dolphinscheduler.server.master.cluster.loadbalancer.WorkerLoadBalancerConfigurationProperties;
import org.apache.dolphinscheduler.server.master.cluster.loadbalancer.WorkerLoadBalancerType;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
@ -33,12 +36,6 @@ public class MasterConfigTest {
@Autowired
private MasterConfig masterConfig;
@Test
public void getMasterDispatchTaskNumber() {
int masterDispatchTaskNumber = masterConfig.getDispatchTaskNumber();
assertEquals(30, masterDispatchTaskNumber);
}
@Test
public void getServerLoadProtection() {
MasterServerLoadProtection serverLoadProtection = masterConfig.getServerLoadProtection();
@ -61,4 +58,18 @@ public class MasterConfigTest {
assertThat(idSlotBasedFetchConfig.getIdStep()).isEqualTo(3);
assertThat(idSlotBasedFetchConfig.getFetchSize()).isEqualTo(11);
}
@Test
public void getWorkerLoadBalancerConfigurationProperties() {
WorkerLoadBalancerConfigurationProperties workerLoadBalancerConfigurationProperties =
masterConfig.getWorkerLoadBalancerConfigurationProperties();
assertThat(workerLoadBalancerConfigurationProperties.getType())
.isEqualTo(WorkerLoadBalancerType.DYNAMIC_WEIGHTED_ROUND_ROBIN);
WorkerLoadBalancerConfigurationProperties.DynamicWeightConfigProperties dynamicWeightConfigProperties =
workerLoadBalancerConfigurationProperties.getDynamicWeightConfigProperties();
assertThat(dynamicWeightConfigProperties.getMemoryUsageWeight()).isEqualTo(40);
assertThat(dynamicWeightConfigProperties.getCpuUsageWeight()).isEqualTo(30);
assertThat(dynamicWeightConfigProperties.getTaskThreadPoolUsageWeight()).isEqualTo(30);
}
}

43
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/HostWorkerTest.java

@ -1,43 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
public class HostWorkerTest {
@Test
public void testHostWorker1() {
HostWorker hostWorker = new HostWorker("192.158.2.2", 11, 20, "default");
Assertions.assertEquals("192.158.2.2", hostWorker.getIp());
Assertions.assertEquals(11, hostWorker.getPort());
Assertions.assertEquals(20, hostWorker.getHostWeight());
Assertions.assertEquals("default", hostWorker.getWorkerGroup());
}
@Test
public void testHostWorker2() {
HostWorker hostWorker = HostWorker.of("192.158.2.2:22", 80, "default");
Assertions.assertEquals("192.158.2.2", hostWorker.getIp());
Assertions.assertEquals(22, hostWorker.getPort());
Assertions.assertEquals(80, hostWorker.getHostWeight());
Assertions.assertEquals("default", hostWorker.getWorkerGroup());
}
}

97
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/LowerWeightRoundRobinTest.java

@ -1,97 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import java.util.ArrayList;
import java.util.Collection;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
public class LowerWeightRoundRobinTest {
@Test
public void testSelect() {
Collection<HostWeight> sources = new ArrayList<>();
sources.add(new HostWeight(HostWorker.of("192.158.2.1:11", 100, "default"), 0.06, 0.44, 0.384, 0.1,
System.currentTimeMillis() - 60 * 8 * 1000));
sources.add(new HostWeight(HostWorker.of("192.158.2.2:22", 100, "default"), 0.06, 0.56, 0.324, 0.2,
System.currentTimeMillis() - 60 * 5 * 1000));
sources.add(new HostWeight(HostWorker.of("192.158.2.3:33", 100, "default"), 0.06, 0.80, 0.315, 0.1,
System.currentTimeMillis() - 60 * 2 * 1000));
LowerWeightRoundRobin roundRobin = new LowerWeightRoundRobin();
HostWeight result;
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.1", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.2", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.1", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.2", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.1", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.2", result.getHost().getIp());
}
@Test
public void testWarmUpSelect() {
Collection<HostWeight> sources = new ArrayList<>();
sources.add(new HostWeight(HostWorker.of("192.158.2.1:11", 100, "default"), 0.06, 0.44, 0.384, 0,
System.currentTimeMillis() - 60 * 8 * 1000));
sources.add(new HostWeight(HostWorker.of("192.158.2.2:22", 100, "default"), 0.06, 0.44, 0.384, 0,
System.currentTimeMillis() - 60 * 5 * 1000));
sources.add(new HostWeight(HostWorker.of("192.158.2.3:33", 100, "default"), 0.06, 0.44, 0.384, 0,
System.currentTimeMillis() - 60 * 3 * 1000));
sources.add(new HostWeight(HostWorker.of("192.158.2.4:33", 100, "default"), 0.06, 0.44, 0.384, 0,
System.currentTimeMillis() - 60 * 11 * 1000));
LowerWeightRoundRobin roundRobin = new LowerWeightRoundRobin();
HostWeight result;
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.4", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.1", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.2", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.4", result.getHost().getIp());
result = roundRobin.select(sources);
Assertions.assertEquals("192.158.2.1", result.getHost().getIp());
}
@Test
public void testDoSelect() {
Collection<HostWeight> sources = new ArrayList<>();
LowerWeightRoundRobin roundRobin = new LowerWeightRoundRobin();
HostWeight result;
result = roundRobin.doSelect(sources);
Assertions.assertNull(result);
sources.add(new HostWeight(HostWorker.of("192.158.2.1:11", 100, "default"), 0.06, 0.44, 0.314, 0.1,
System.currentTimeMillis() - 60 * 8 * 1000));
sources.add(new HostWeight(HostWorker.of("192.158.2.2:22", 100, "default"), 0.06, 0.56, 0.324, 0.2,
System.currentTimeMillis() - 60 * 5 * 1000));
sources.add(new HostWeight(HostWorker.of("192.158.2.3:33", 100, "default"), 0.06, 0.80, 0.315, 0.1,
System.currentTimeMillis() - 60 * 2 * 1000));
result = roundRobin.doSelect(sources);
Assertions.assertEquals("192.158.2.1", result.getHost().getIp());
}
}

53
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RandomSelectorTest.java

@ -1,53 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import java.util.Arrays;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
/**
* random selector
*/
public class RandomSelectorTest {
@Test
public void testSelectWithIllegalArgumentException() {
RandomSelector selector = new RandomSelector();
Assertions.assertThrows(IllegalArgumentException.class, () -> {
selector.select(null);
});
}
@Test
public void testSelect1() {
RandomSelector selector = new RandomSelector();
HostWorker result = selector.select(Arrays.asList(new HostWorker("192.168.1.1:11", 100, "default"),
new HostWorker("192.168.1.2:22", 80, "default")));
Assertions.assertNotNull(result);
}
@Test
public void testSelect() {
RandomSelector selector = new RandomSelector();
HostWorker result = selector.select(Arrays.asList(new HostWorker("192.168.1.1", 11, 100, "default"),
new HostWorker("192.168.1.2:", 22, 20, "default")));
Assertions.assertNotNull(result);
}
}

106
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/dispatch/host/assign/RoundRobinSelectorTest.java

@ -1,106 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.dispatch.host.assign;
import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
/**
* round robin selector
*/
public class RoundRobinSelectorTest {
@Test
public void testSelectWithIllegalArgumentException() {
RoundRobinSelector selector = new RoundRobinSelector();
Assertions.assertThrows(IllegalArgumentException.class, () -> {
selector.select(null);
});
}
@Test
public void testSelect1() {
RoundRobinSelector selector = new RoundRobinSelector();
List<HostWorker> hostOneList = Arrays.asList(
new HostWorker("192.168.1.1", 80, 20, "kris"),
new HostWorker("192.168.1.2", 80, 10, "kris"));
List<HostWorker> hostTwoList = Arrays.asList(
new HostWorker("192.168.1.1", 80, 20, "kris"),
new HostWorker("192.168.1.2", 80, 10, "kris"),
new HostWorker("192.168.1.3", 80, 10, "kris"));
HostWorker result;
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.1", result.getIp());
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.2", result.getIp());
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.1", result.getIp());
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.1", result.getIp());
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.2", result.getIp());
// add new host
result = selector.select(hostTwoList);
Assertions.assertEquals("192.168.1.1", result.getIp());
result = selector.select(hostTwoList);
Assertions.assertEquals("192.168.1.3", result.getIp());
result = selector.select(hostTwoList);
Assertions.assertEquals("192.168.1.1", result.getIp());
result = selector.select(hostTwoList);
Assertions.assertEquals("192.168.1.2", result.getIp());
result = selector.select(hostTwoList);
Assertions.assertEquals("192.168.1.1", result.getIp());
result = selector.select(hostTwoList);
Assertions.assertEquals("192.168.1.3", result.getIp());
// remove host3
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.1", result.getIp());
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.2", result.getIp());
result = selector.select(hostOneList);
Assertions.assertEquals("192.168.1.1", result.getIp());
}
@Test
public void testWeightRoundRobinSelector() {
RoundRobinSelector selector = new RoundRobinSelector();
HostWorker result;
result = selector.select(
Arrays.asList(new HostWorker("192.168.1.1", 11, 20, "kris"),
new HostWorker("192.168.1.2", 22, 80, "kris")));
Assertions.assertEquals("192.168.1.2", result.getIp());
}
}

94
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManagerTest.java

@ -1,94 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.registry;
import org.apache.dolphinscheduler.common.enums.ServerStatus;
import org.apache.dolphinscheduler.common.model.MasterHeartBeat;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
public class MasterSlotManagerTest {
@InjectMocks
private MasterSlotManager masterSlotManager = Mockito.spy(new MasterSlotManager());
@Mock
private MasterConfig masterConfig;
@Test
void testNormalMasterSlots() {
// on normal Master side
Mockito.when(masterConfig.getMasterAddress()).thenReturn("127.0.0.1:7777");
sendHeartBeat(ServerStatus.BUSY, ServerStatus.NORMAL);
Assertions.assertEquals(1, masterSlotManager.getMasterSize());
Assertions.assertEquals(0, masterSlotManager.getSlot());
sendHeartBeat(ServerStatus.NORMAL, ServerStatus.NORMAL);
Assertions.assertEquals(2, masterSlotManager.getMasterSize());
Assertions.assertEquals(1, masterSlotManager.getSlot());
}
@Test
void testOverloadMasterSlots() {
// on abnormal Master side
Mockito.when(masterConfig.getMasterAddress()).thenReturn("127.0.0.1:6666");
sendHeartBeat(ServerStatus.BUSY, ServerStatus.NORMAL);
Assertions.assertEquals(0, masterSlotManager.getMasterSize());
Assertions.assertEquals(0, masterSlotManager.getSlot());
sendHeartBeat(ServerStatus.NORMAL, ServerStatus.NORMAL);
Assertions.assertEquals(2, masterSlotManager.getMasterSize());
Assertions.assertEquals(0, masterSlotManager.getSlot());
}
public void sendHeartBeat(ServerStatus serverStatus1, ServerStatus serverStatus2) {
MasterSlotManager.SlotChangeListener slotChangeListener = masterSlotManager.new SlotChangeListener();
Map<String, MasterHeartBeat> masterNodeInfo = new HashMap<>();
// generate heartbeat
MasterHeartBeat masterHeartBeat1 = MasterHeartBeat.builder()
.startupTime(System.currentTimeMillis())
.serverStatus(serverStatus1)
.host("127.0.0.1")
.port(6666)
.build();
MasterHeartBeat masterHeartBeat2 = MasterHeartBeat.builder()
.startupTime(System.currentTimeMillis())
.serverStatus(serverStatus2)
.host("127.0.0.1")
.port(7777)
.build();
masterNodeInfo.put("127.0.0.1:6666", masterHeartBeat1);
masterNodeInfo.put("127.0.0.1:7777", masterHeartBeat2);
slotChangeListener.notify(masterNodeInfo);
}
}

93
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManagerTest.java

@ -1,93 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.registry;
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.dao.AlertDao;
import org.apache.dolphinscheduler.registry.api.Event;
import org.apache.dolphinscheduler.registry.api.RegistryClient;
import org.apache.dolphinscheduler.service.alert.ListenerEventAlertManager;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;
public class ServerNodeManagerTest {
@Mock
RegistryClient registryClient;
@Mock
AlertDao alertDao;
@Mock
ListenerEventAlertManager listenerEventAlertManager;
@InjectMocks
ServerNodeManager serverNodeManager;
@Test
public void updateWorkerNodesTest() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
MockitoAnnotations.initMocks(this);
HashMap<String, String> workerNodeMaps = new HashMap<>();
workerNodeMaps.put("worker-node-1", JSONUtils.toJsonString(new WorkerHeartBeat()));
workerNodeMaps.put("worker-node-2", JSONUtils.toJsonString(new WorkerHeartBeat()));
Mockito.when(registryClient.getServerMaps(Mockito.any())).thenReturn(workerNodeMaps);
Mockito.when(registryClient.isWorkerPath(Mockito.anyString())).thenReturn(true);
// two worker server running (worker-node-1, worker-node-2)
Method updateWorkerNodes = serverNodeManager.getClass().getDeclaredMethod("updateWorkerNodes");
updateWorkerNodes.setAccessible(true);
updateWorkerNodes.invoke(serverNodeManager);
Map<String, WorkerHeartBeat> workerNodeInfo = serverNodeManager.getWorkerNodeInfo();
Assertions.assertTrue(workerNodeInfo.containsKey("worker-node-1"));
Assertions.assertTrue(workerNodeInfo.containsKey("worker-node-2"));
// receive remove event when worker-node-1 server stop
ServerNodeManager.WorkerDataListener workerDataListener = serverNodeManager.new WorkerDataListener();
Event event = new Event("", "/nodes/worker/worker-node-1", "", Event.Type.REMOVE);
workerDataListener.notify(event);
// check worker-node-1 not exist in cache
workerNodeInfo = serverNodeManager.getWorkerNodeInfo();
Assertions.assertFalse(workerNodeInfo.containsKey("worker-node-1"));
Assertions.assertTrue(workerNodeInfo.containsKey("worker-node-2"));
// worker-node-1 restart, getServerMaps(RegistryNodeType.WORKER) method return two worker
updateWorkerNodes.invoke(serverNodeManager);
// check cache
workerNodeInfo = serverNodeManager.getWorkerNodeInfo();
Assertions.assertTrue(workerNodeInfo.containsKey("worker-node-1"));
Assertions.assertTrue(workerNodeInfo.containsKey("worker-node-2"));
}
}

3
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/MasterTaskDispatcherTest.java

@ -38,8 +38,7 @@ public class MasterTaskDispatcherTest {
Mockito.when(masterConfig.getMasterAddress()).thenReturn("localhost:5678");
TaskExecuteRunnable taskExecuteRunnable = Mockito.mock(TaskExecuteRunnable.class);
MasterTaskDispatcher masterTaskDispatcher =
new MasterTaskDispatcher(taskEventService, masterConfig);
MasterTaskDispatcher masterTaskDispatcher = new MasterTaskDispatcher(masterConfig);
Host taskInstanceDispatchHost = masterTaskDispatcher.getTaskInstanceDispatchHost(taskExecuteRunnable)
.orElseThrow(() -> new IllegalArgumentException("Cannot get the "));
Assertions.assertEquals(masterConfig.getMasterAddress(), taskInstanceDispatchHost.getAddress());

16
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/runner/dispatcher/WorkerTaskDispatcherTest.java

@ -19,10 +19,7 @@ package org.apache.dolphinscheduler.server.master.runner.dispatcher;
import org.apache.dolphinscheduler.extract.base.utils.Host;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException;
import org.apache.dolphinscheduler.server.master.dispatch.host.HostManager;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEventService;
import org.apache.dolphinscheduler.server.master.cluster.loadbalancer.IWorkerLoadBalancer;
import org.apache.dolphinscheduler.server.master.runner.TaskExecuteRunnable;
import java.util.Optional;
@ -37,13 +34,10 @@ import org.mockito.junit.jupiter.MockitoExtension;
public class WorkerTaskDispatcherTest {
@Test
public void getTaskInstanceDispatchHost() throws WorkerGroupNotFoundException {
TaskEventService taskEventService = Mockito.mock(TaskEventService.class);
MasterConfig masterConfig = Mockito.mock(MasterConfig.class);
HostManager hostManager = Mockito.mock(HostManager.class);
Mockito.when(hostManager.select(Mockito.any())).thenReturn(Optional.of(Host.of("localhost:1234")));
WorkerTaskDispatcher workerTaskDispatcher =
new WorkerTaskDispatcher(taskEventService, masterConfig, hostManager);
public void getTaskInstanceDispatchHost() {
IWorkerLoadBalancer workerLoadBalancer = Mockito.mock(IWorkerLoadBalancer.class);
Mockito.when(workerLoadBalancer.select(Mockito.any())).thenReturn(Optional.of("localhost:1234"));
WorkerTaskDispatcher workerTaskDispatcher = new WorkerTaskDispatcher(workerLoadBalancer);
TaskExecuteRunnable taskExecuteRunnable = Mockito.mock(TaskExecuteRunnable.class);
Mockito.when(taskExecuteRunnable.getTaskExecutionContext()).thenReturn(new TaskExecutionContext());

59
dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/utils/MasterThreadFactoryTest.java

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.utils;
import java.time.Duration;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import org.awaitility.Awaitility;
import org.junit.jupiter.api.Test;
import com.google.common.truth.Truth;
class MasterThreadFactoryTest {
@Test
void getDefaultSchedulerThreadExecutor() {
ScheduledExecutorService defaultSchedulerThreadExecutor =
MasterThreadFactory.getDefaultSchedulerThreadExecutor();
Truth.assertThat(defaultSchedulerThreadExecutor).isNotNull();
AtomicBoolean taskOneFlag = new AtomicBoolean(false);
defaultSchedulerThreadExecutor.scheduleWithFixedDelay(() -> {
taskOneFlag.set(true);
}, 0,
1,
java.util.concurrent.TimeUnit.SECONDS);
AtomicBoolean taskTwoFlag = new AtomicBoolean(false);
defaultSchedulerThreadExecutor.scheduleWithFixedDelay(() -> {
taskTwoFlag.set(true);
}, 0,
1,
java.util.concurrent.TimeUnit.SECONDS);
Awaitility.await()
.atMost(Duration.ofSeconds(5))
.untilAsserted(() -> {
Truth.assertThat(taskOneFlag.get()).isTrue();
Truth.assertThat(taskTwoFlag.get()).isTrue();
});
}
}

10
dolphinscheduler-master/src/test/resources/application.yaml

@ -95,8 +95,6 @@ master:
exec-threads: 100
# master dispatch task number per batch, if all the tasks dispatch failed in a batch, will sleep 1s.
dispatch-task-number: 30
# master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight
host-selector: lower_weight
# master heartbeat interval
max-heartbeat-interval: 10s
# master commit task retry times
@ -115,6 +113,14 @@ master:
max-system-memory-usage-percentage-thresholds: 0.77
# Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow.
max-disk-usage-percentage-thresholds: 0.77
worker-load-balancer-configuration-properties:
# RANDOM, ROUND_ROBIN, FIXED_WEIGHTED_ROUND_ROBIN, DYNAMIC_WEIGHTED_ROUND_ROBIN
type: DYNAMIC_WEIGHTED_ROUND_ROBIN
# dynamic-weight-config-properties only used in DYNAMIC_WEIGHTED_ROUND_ROBIN, the weight of memory-usage, cpu-usage, task-thread-pool-usage should sum to 100.
dynamic-weight-config-properties:
memory-usage-weight: 40
cpu-usage-weight: 30
task-thread-pool-usage-weight: 30
# failover interval, the unit is minute
failover-interval: 10m
# kill yarn / k8s application when failover taskInstance, default true

12
dolphinscheduler-registry/dolphinscheduler-registry-api/src/main/java/org/apache/dolphinscheduler/registry/api/RegistryClient.java

@ -59,9 +59,15 @@ public class RegistryClient {
public RegistryClient(Registry registry) {
this.registry = registry;
registry.put(RegistryNodeType.MASTER.getRegistryPath(), EMPTY, false);
registry.put(RegistryNodeType.WORKER.getRegistryPath(), EMPTY, false);
registry.put(RegistryNodeType.ALERT_SERVER.getRegistryPath(), EMPTY, false);
if (!registry.exists(RegistryNodeType.MASTER.getRegistryPath())) {
registry.put(RegistryNodeType.MASTER.getRegistryPath(), EMPTY, false);
}
if (!registry.exists(RegistryNodeType.WORKER.getRegistryPath())) {
registry.put(RegistryNodeType.WORKER.getRegistryPath(), EMPTY, false);
}
if (!registry.exists(RegistryNodeType.ALERT_SERVER.getRegistryPath())) {
registry.put(RegistryNodeType.ALERT_SERVER.getRegistryPath(), EMPTY, false);
}
}
public boolean isConnected() {

120
dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/queue/MasterPriorityQueue.java

@ -1,120 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.service.queue;
import org.apache.dolphinscheduler.common.model.Server;
import org.apache.dolphinscheduler.common.utils.NetUtils;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
public class MasterPriorityQueue implements TaskPriorityQueue<Server> {
/**
* queue size
*/
private static final Integer QUEUE_MAX_SIZE = 20;
/**
* queue
*/
private PriorityBlockingQueue<Server> queue = new PriorityBlockingQueue<>(QUEUE_MAX_SIZE, new ServerComparator());
private HashMap<String, Integer> hostIndexMap = new HashMap<>();
@Override
public void put(Server serverInfo) {
this.queue.put(serverInfo);
refreshMasterList();
}
@Override
public Server take() throws InterruptedException {
return queue.take();
}
@Override
public Server poll(long timeout, TimeUnit unit) {
return queue.poll();
}
@Override
public int size() {
return queue.size();
}
public void putAll(Collection<Server> serverList) {
for (Server server : serverList) {
this.queue.put(server);
}
refreshMasterList();
}
public void remove(Server server) {
this.queue.remove(server);
}
public void clear() {
queue.clear();
refreshMasterList();
}
private void refreshMasterList() {
hostIndexMap.clear();
int index = 0;
for (Server server : getOrderedElements()) {
String addr = NetUtils.getAddr(server.getHost(), server.getPort());
hostIndexMap.put(addr, index);
index += 1;
}
}
/**
* get ordered collection of priority queue
*
* @return ordered collection
*/
Server[] getOrderedElements() {
Server[] nQueue = queue.toArray(new Server[0]);
Arrays.sort(nQueue, new ServerComparator());
return nQueue;
}
public int getIndex(String addr) {
if (!hostIndexMap.containsKey(addr)) {
return -1;
}
return hostIndexMap.get(addr);
}
/**
* server comparator, used to sort server by createTime in reverse order.
*/
private class ServerComparator implements Comparator<Server> {
@Override
public int compare(Server o1, Server o2) {
return o2.getCreateTime().compareTo(o1.getCreateTime());
}
}
}

101
dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/queue/MasterPriorityQueueTest.java

@ -1,101 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.service.queue;
import org.apache.dolphinscheduler.common.model.Server;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
public class MasterPriorityQueueTest {
@Test
public void getOrderedCollection() {
MasterPriorityQueue queue = new MasterPriorityQueue();
// Test empty queue
Server[] emptyElements = queue.getOrderedElements();
Assertions.assertArrayEquals(emptyElements, new Server[]{});
// Test queue with fabricated servers
queue.putAll(getServerList());
Server[] orderElements = queue.getOrderedElements();
Assertions.assertEquals(extractServerIds(orderElements), Arrays.asList(4, 2, 1, 3));
}
@Test
public void refreshMasterList() {
MasterPriorityQueue queue = new MasterPriorityQueue();
// Test empty queue
queue.clear();
Assertions.assertEquals(queue.getIndex("127.0.0.1:124"), -1);
// Test queue with fabricated servers
queue.putAll(getServerList());
Assertions.assertEquals(queue.getIndex("127.0.0.1:124"), 0);
Assertions.assertEquals(queue.getIndex("127.0.0.1:122"), 1);
Assertions.assertEquals(queue.getIndex("127.0.0.1:121"), 2);
Assertions.assertEquals(queue.getIndex("127.0.0.1:123"), 3);
}
private List<Server> getServerList() {
long baseTime = new Date().getTime();
Server server1 = new Server();
server1.setId(1);
server1.setHost("127.0.0.1");
server1.setPort(121);
server1.setCreateTime(new Date(baseTime - 1000));
Server server2 = new Server();
server2.setId(2);
server2.setHost("127.0.0.1");
server2.setPort(122);
server2.setCreateTime(new Date(baseTime + 1000));
Server server3 = new Server();
server3.setId(3);
server3.setHost("127.0.0.1");
server3.setPort(123);
server3.setCreateTime(new Date(baseTime - 2000));
Server server4 = new Server();
server4.setId(4);
server4.setHost("127.0.0.1");
server4.setPort(124);
server4.setCreateTime(new Date(baseTime + 2000));
return Arrays.asList(server1, server2, server3, server4);
}
private List<Integer> extractServerIds(Server[] servers) {
return Arrays.stream(servers).map(Server::getId).collect(Collectors.toList());
}
}

12
dolphinscheduler-standalone-server/src/main/resources/application.yaml

@ -165,10 +165,6 @@ master:
pre-exec-threads: 10
# master execute thread number to limit process instances in parallel
exec-threads: 10
# master dispatch task number per batch
dispatch-task-number: 3
# master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight
host-selector: lower_weight
# master heartbeat interval
max-heartbeat-interval: 10s
# master commit task retry times
@ -186,6 +182,14 @@ master:
max-system-memory-usage-percentage-thresholds: 0.9
# Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow.
max-disk-usage-percentage-thresholds: 0.9
worker-load-balancer-configuration-properties:
# RANDOM, ROUND_ROBIN, FIXED_WEIGHTED_ROUND_ROBIN, DYNAMIC_WEIGHTED_ROUND_ROBIN
type: DYNAMIC_WEIGHTED_ROUND_ROBIN
# dynamic-weight-config-properties only used in DYNAMIC_WEIGHTED_ROUND_ROBIN, the weight of memory-usage, cpu-usage, task-thread-pool-usage should sum to 100.
dynamic-weight-config-properties:
memory-usage-weight: 30
cpu-usage-weight: 30
task-thread-pool-usage-weight: 40
# failover interval
failover-interval: 10m
# kill yarn/k8s application when failover taskInstance, default true

4
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecutorThreadPool.java

@ -80,6 +80,10 @@ public class WorkerTaskExecutorThreadPool {
return Math.min(WorkerTaskExecutorHolder.size(), workerConfig.getExecThreads());
}
public double getThreadPoolUsage() {
return (double) WorkerTaskExecutorHolder.size() / workerConfig.getExecThreads();
}
/**
* Kill tasks that have not been executed, e.g. waiting in the queue
*/

3
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java

@ -73,8 +73,7 @@ public class WorkerHeartBeatTask extends BaseHeartBeatTask<WorkerHeartBeat> {
.diskUsage(systemMetrics.getDiskUsedPercentage())
.processId(processId)
.workerHostWeight(workerConfig.getHostWeight())
.threadPoolUsage(workerTaskExecutorThreadPool.getRunningTaskExecutorSize()
+ workerTaskExecutorThreadPool.getWaitingTaskExecutorSize())
.threadPoolUsage(workerTaskExecutorThreadPool.getThreadPoolUsage())
.serverStatus(serverStatus)
.host(NetUtils.getHost())
.port(workerConfig.getListenPort())

Loading…
Cancel
Save