|
|
@ -20,11 +20,11 @@ package org.apache.dolphinscheduler.server.master.dispatch.host; |
|
|
|
import org.apache.dolphinscheduler.common.Constants; |
|
|
|
import org.apache.dolphinscheduler.common.Constants; |
|
|
|
import org.apache.dolphinscheduler.common.utils.HeartBeat; |
|
|
|
import org.apache.dolphinscheduler.common.utils.HeartBeat; |
|
|
|
import org.apache.dolphinscheduler.remote.utils.Host; |
|
|
|
import org.apache.dolphinscheduler.remote.utils.Host; |
|
|
|
import org.apache.dolphinscheduler.remote.utils.NamedThreadFactory; |
|
|
|
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.context.ExecutionContext; |
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.context.ExecutionContext; |
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWeight; |
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWeight; |
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWorker; |
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.HostWorker; |
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.LowerWeightRoundRobin; |
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.host.assign.LowerWeightRoundRobin; |
|
|
|
|
|
|
|
import org.apache.dolphinscheduler.server.master.registry.WorkerInfoChangeListener; |
|
|
|
import org.apache.dolphinscheduler.spi.utils.StringUtils; |
|
|
|
import org.apache.dolphinscheduler.spi.utils.StringUtils; |
|
|
|
|
|
|
|
|
|
|
|
import org.apache.commons.collections.CollectionUtils; |
|
|
|
import org.apache.commons.collections.CollectionUtils; |
|
|
@ -36,14 +36,10 @@ import java.util.Map; |
|
|
|
import java.util.Optional; |
|
|
|
import java.util.Optional; |
|
|
|
import java.util.Set; |
|
|
|
import java.util.Set; |
|
|
|
import java.util.concurrent.ConcurrentHashMap; |
|
|
|
import java.util.concurrent.ConcurrentHashMap; |
|
|
|
import java.util.concurrent.Executors; |
|
|
|
|
|
|
|
import java.util.concurrent.ScheduledExecutorService; |
|
|
|
|
|
|
|
import java.util.concurrent.TimeUnit; |
|
|
|
|
|
|
|
import java.util.concurrent.locks.Lock; |
|
|
|
import java.util.concurrent.locks.Lock; |
|
|
|
import java.util.concurrent.locks.ReentrantLock; |
|
|
|
import java.util.concurrent.locks.ReentrantLock; |
|
|
|
|
|
|
|
|
|
|
|
import javax.annotation.PostConstruct; |
|
|
|
import javax.annotation.PostConstruct; |
|
|
|
import javax.annotation.PreDestroy; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import org.slf4j.Logger; |
|
|
|
import org.slf4j.Logger; |
|
|
|
import org.slf4j.LoggerFactory; |
|
|
|
import org.slf4j.LoggerFactory; |
|
|
@ -70,23 +66,12 @@ public class LowerWeightHostManager extends CommonHostManager { |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
private Lock lock; |
|
|
|
private Lock lock; |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|
|
|
* executor service |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
private ScheduledExecutorService executorService; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@PostConstruct |
|
|
|
@PostConstruct |
|
|
|
public void init() { |
|
|
|
public void init() { |
|
|
|
this.selector = new LowerWeightRoundRobin(); |
|
|
|
this.selector = new LowerWeightRoundRobin(); |
|
|
|
this.workerHostWeightsMap = new ConcurrentHashMap<>(); |
|
|
|
this.workerHostWeightsMap = new ConcurrentHashMap<>(); |
|
|
|
this.lock = new ReentrantLock(); |
|
|
|
this.lock = new ReentrantLock(); |
|
|
|
this.executorService = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("LowerWeightHostManagerExecutor")); |
|
|
|
serverNodeManager.addWorkerInfoChangeListener(new WorkerWeightListener()); |
|
|
|
this.executorService.scheduleWithFixedDelay(new RefreshResourceTask(), 0, 1, TimeUnit.SECONDS); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@PreDestroy |
|
|
|
|
|
|
|
public void close() { |
|
|
|
|
|
|
|
this.executorService.shutdownNow(); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
@ -109,6 +94,69 @@ public class LowerWeightHostManager extends CommonHostManager { |
|
|
|
throw new UnsupportedOperationException("not support"); |
|
|
|
throw new UnsupportedOperationException("not support"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private class WorkerWeightListener implements WorkerInfoChangeListener { |
|
|
|
|
|
|
|
@Override |
|
|
|
|
|
|
|
public void notify(Map<String, Set<String>> workerGroups, Map<String, String> workerNodeInfo) { |
|
|
|
|
|
|
|
syncWorkerResources(workerGroups, workerNodeInfo); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|
|
|
* Sync worker resource. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* @param workerGroupNodes worker group nodes, key is worker group, value is worker group nodes. |
|
|
|
|
|
|
|
* @param workerNodeInfoMap worker node info map, key is worker node, value is worker info. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
private void syncWorkerResources(final Map<String, Set<String>> workerGroupNodes, |
|
|
|
|
|
|
|
final Map<String, String> workerNodeInfoMap) { |
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
Map<String, Set<HostWeight>> workerHostWeights = new HashMap<>(); |
|
|
|
|
|
|
|
for (Map.Entry<String, Set<String>> entry : workerGroupNodes.entrySet()) { |
|
|
|
|
|
|
|
String workerGroup = entry.getKey(); |
|
|
|
|
|
|
|
Set<String> nodes = entry.getValue(); |
|
|
|
|
|
|
|
Set<HostWeight> hostWeights = new HashSet<>(nodes.size()); |
|
|
|
|
|
|
|
for (String node : nodes) { |
|
|
|
|
|
|
|
String heartbeat = workerNodeInfoMap.getOrDefault(node, null); |
|
|
|
|
|
|
|
Optional<HostWeight> hostWeightOpt = getHostWeight(node, workerGroup, heartbeat); |
|
|
|
|
|
|
|
hostWeightOpt.ifPresent(hostWeights::add); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (!hostWeights.isEmpty()) { |
|
|
|
|
|
|
|
workerHostWeights.put(workerGroup, hostWeights); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
syncWorkerHostWeight(workerHostWeights); |
|
|
|
|
|
|
|
} catch (Throwable ex) { |
|
|
|
|
|
|
|
logger.error("Sync worker resource error", ex); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private Optional<HostWeight> getHostWeight(String addr, String workerGroup, String heartBeatInfo) { |
|
|
|
|
|
|
|
if (StringUtils.isEmpty(heartBeatInfo)) { |
|
|
|
|
|
|
|
logger.warn("worker {} in work group {} have not received the heartbeat", addr, workerGroup); |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
HeartBeat heartBeat = HeartBeat.decodeHeartBeat(heartBeatInfo); |
|
|
|
|
|
|
|
if (heartBeat == null) { |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (Constants.ABNORMAL_NODE_STATUS == heartBeat.getServerStatus()) { |
|
|
|
|
|
|
|
logger.warn("worker {} current cpu load average {} is too high or available memory {}G is too low", |
|
|
|
|
|
|
|
addr, heartBeat.getLoadAverage(), heartBeat.getAvailablePhysicalMemorySize()); |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (Constants.BUSY_NODE_STATUE == heartBeat.getServerStatus()) { |
|
|
|
|
|
|
|
logger.warn("worker {} is busy, current waiting task count {} is large than worker thread count {}", |
|
|
|
|
|
|
|
addr, heartBeat.getWorkerWaitingTaskCount(), heartBeat.getWorkerExecThreadCount()); |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return Optional.of( |
|
|
|
|
|
|
|
new HostWeight(HostWorker.of(addr, heartBeat.getWorkerHostWeight(), workerGroup), |
|
|
|
|
|
|
|
heartBeat.getCpuUsage(), heartBeat.getMemoryUsage(), heartBeat.getLoadAverage(), |
|
|
|
|
|
|
|
heartBeat.getWorkerWaitingTaskCount(), heartBeat.getStartupTime())); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private void syncWorkerHostWeight(Map<String, Set<HostWeight>> workerHostWeights) { |
|
|
|
private void syncWorkerHostWeight(Map<String, Set<HostWeight>> workerHostWeights) { |
|
|
|
lock.lock(); |
|
|
|
lock.lock(); |
|
|
|
try { |
|
|
|
try { |
|
|
@ -128,58 +176,4 @@ public class LowerWeightHostManager extends CommonHostManager { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class RefreshResourceTask implements Runnable { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
|
|
|
public void run() { |
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
Map<String, Set<HostWeight>> workerHostWeights = new HashMap<>(); |
|
|
|
|
|
|
|
Map<String, Set<String>> workerGroupNodes = serverNodeManager.getWorkerGroupNodes(); |
|
|
|
|
|
|
|
for (Map.Entry<String, Set<String>> entry : workerGroupNodes.entrySet()) { |
|
|
|
|
|
|
|
String workerGroup = entry.getKey(); |
|
|
|
|
|
|
|
Set<String> nodes = entry.getValue(); |
|
|
|
|
|
|
|
Set<HostWeight> hostWeights = new HashSet<>(nodes.size()); |
|
|
|
|
|
|
|
for (String node : nodes) { |
|
|
|
|
|
|
|
String heartbeat = serverNodeManager.getWorkerNodeInfo(node); |
|
|
|
|
|
|
|
Optional<HostWeight> hostWeightOpt = getHostWeight(node, workerGroup, heartbeat); |
|
|
|
|
|
|
|
if (hostWeightOpt.isPresent()) { |
|
|
|
|
|
|
|
hostWeights.add(hostWeightOpt.get()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (!hostWeights.isEmpty()) { |
|
|
|
|
|
|
|
workerHostWeights.put(workerGroup, hostWeights); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
syncWorkerHostWeight(workerHostWeights); |
|
|
|
|
|
|
|
} catch (Throwable ex) { |
|
|
|
|
|
|
|
logger.error("RefreshResourceTask error", ex); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public Optional<HostWeight> getHostWeight(String addr, String workerGroup, String heartBeatInfo) { |
|
|
|
|
|
|
|
if (StringUtils.isEmpty(heartBeatInfo)) { |
|
|
|
|
|
|
|
logger.warn("worker {} in work group {} have not received the heartbeat", addr, workerGroup); |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
HeartBeat heartBeat = HeartBeat.decodeHeartBeat(heartBeatInfo); |
|
|
|
|
|
|
|
if (heartBeat == null) { |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (Constants.ABNORMAL_NODE_STATUS == heartBeat.getServerStatus()) { |
|
|
|
|
|
|
|
logger.warn("worker {} current cpu load average {} is too high or available memory {}G is too low", |
|
|
|
|
|
|
|
addr, heartBeat.getLoadAverage(), heartBeat.getAvailablePhysicalMemorySize()); |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (Constants.BUSY_NODE_STATUE == heartBeat.getServerStatus()) { |
|
|
|
|
|
|
|
logger.warn("worker {} is busy, current waiting task count {} is large than worker thread count {}", |
|
|
|
|
|
|
|
addr, heartBeat.getWorkerWaitingTaskCount(), heartBeat.getWorkerExecThreadCount()); |
|
|
|
|
|
|
|
return Optional.empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return Optional.of( |
|
|
|
|
|
|
|
new HostWeight(HostWorker.of(addr, heartBeat.getWorkerHostWeight(), workerGroup), |
|
|
|
|
|
|
|
heartBeat.getCpuUsage(), heartBeat.getMemoryUsage(), heartBeat.getLoadAverage(), |
|
|
|
|
|
|
|
heartBeat.getWorkerWaitingTaskCount(), heartBeat.getStartupTime())); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|