Wenjun Ruan
2 years ago
committed by
GitHub
25 changed files with 583 additions and 886 deletions
@ -0,0 +1,81 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.model; |
||||
|
||||
import lombok.extern.slf4j.Slf4j; |
||||
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread; |
||||
|
||||
@Slf4j |
||||
public abstract class BaseHeartBeatTask<T> extends BaseDaemonThread { |
||||
|
||||
private final String threadName; |
||||
private final long heartBeatInterval; |
||||
|
||||
protected boolean runningFlag; |
||||
|
||||
public BaseHeartBeatTask(String threadName, long heartBeatInterval) { |
||||
super(threadName); |
||||
this.threadName = threadName; |
||||
this.heartBeatInterval = heartBeatInterval; |
||||
this.runningFlag = true; |
||||
} |
||||
|
||||
@Override |
||||
public synchronized void start() { |
||||
log.info("Starting {}", threadName); |
||||
super.start(); |
||||
log.info("Started {}, heartBeatInterval: {}", threadName, heartBeatInterval); |
||||
} |
||||
|
||||
@Override |
||||
public void run() { |
||||
while (runningFlag) { |
||||
try { |
||||
if (!ServerLifeCycleManager.isRunning()) { |
||||
log.info("The current server status is {}, will not write heartBeatInfo into registry", ServerLifeCycleManager.getServerStatus()); |
||||
continue; |
||||
} |
||||
T heartBeat = getHeartBeat(); |
||||
writeHeartBeat(heartBeat); |
||||
} catch (Exception ex) { |
||||
log.error("{} task execute failed", threadName, ex); |
||||
} finally { |
||||
try { |
||||
Thread.sleep(heartBeatInterval); |
||||
} catch (InterruptedException e) { |
||||
handleInterruptException(e); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
public void shutdown() { |
||||
log.warn("{} task finished", threadName); |
||||
runningFlag = false; |
||||
} |
||||
|
||||
private void handleInterruptException(InterruptedException ex) { |
||||
log.warn("{} has been interrupted", threadName, ex); |
||||
Thread.currentThread().interrupt(); |
||||
} |
||||
|
||||
public abstract T getHeartBeat(); |
||||
|
||||
public abstract void writeHeartBeat(T heartBeat); |
||||
} |
@ -0,0 +1,21 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.model; |
||||
|
||||
public interface HeartBeat { |
||||
} |
@ -0,0 +1,39 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.model; |
||||
|
||||
import lombok.AllArgsConstructor; |
||||
import lombok.Builder; |
||||
import lombok.Data; |
||||
import lombok.NoArgsConstructor; |
||||
|
||||
@Data |
||||
@Builder |
||||
@NoArgsConstructor |
||||
@AllArgsConstructor |
||||
public class MasterHeartBeat implements HeartBeat { |
||||
private long startupTime; |
||||
private long reportTime; |
||||
private double cpuUsage; |
||||
private double memoryUsage; |
||||
private double loadAverage; |
||||
private double availablePhysicalMemorySize; |
||||
private double maxCpuloadAvg; |
||||
private double reservedMemory; |
||||
private int processId; |
||||
} |
@ -0,0 +1,47 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.model; |
||||
|
||||
import lombok.AllArgsConstructor; |
||||
import lombok.Builder; |
||||
import lombok.Data; |
||||
import lombok.NoArgsConstructor; |
||||
|
||||
@Data |
||||
@Builder |
||||
@NoArgsConstructor |
||||
@AllArgsConstructor |
||||
public class WorkerHeartBeat implements HeartBeat { |
||||
|
||||
private long startupTime; |
||||
private long reportTime; |
||||
private double cpuUsage; |
||||
private double memoryUsage; |
||||
private double loadAverage; |
||||
private double availablePhysicalMemorySize; |
||||
private double maxCpuloadAvg; |
||||
private double reservedMemory; |
||||
private int serverStatus; |
||||
private int processId; |
||||
|
||||
private int workerHostWeight; // worker host weight
|
||||
private int workerWaitingTaskCount; // worker waiting task count
|
||||
private int workerExecThreadCount; // worker thread pool thread count
|
||||
|
||||
|
||||
} |
@ -1,261 +0,0 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.utils; |
||||
|
||||
import org.apache.dolphinscheduler.common.Constants; |
||||
|
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
|
||||
public class HeartBeat { |
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HeartBeat.class); |
||||
|
||||
private long startupTime; |
||||
private long reportTime; |
||||
private double cpuUsage; |
||||
private double memoryUsage; |
||||
private double loadAverage; |
||||
private double availablePhysicalMemorySize; |
||||
private double maxCpuloadAvg; |
||||
private double reservedMemory; |
||||
private int serverStatus; |
||||
private int processId; |
||||
|
||||
private int workerHostWeight; // worker host weight
|
||||
private int workerWaitingTaskCount; // worker waiting task count
|
||||
private int workerExecThreadCount; // worker thread pool thread count
|
||||
|
||||
private double diskAvailable; |
||||
|
||||
public double getDiskAvailable() { |
||||
return diskAvailable; |
||||
} |
||||
|
||||
public void setDiskAvailable(double diskAvailable) { |
||||
this.diskAvailable = diskAvailable; |
||||
} |
||||
|
||||
public long getStartupTime() { |
||||
return startupTime; |
||||
} |
||||
|
||||
public void setStartupTime(long startupTime) { |
||||
this.startupTime = startupTime; |
||||
} |
||||
|
||||
public long getReportTime() { |
||||
return reportTime; |
||||
} |
||||
|
||||
public void setReportTime(long reportTime) { |
||||
this.reportTime = reportTime; |
||||
} |
||||
|
||||
public double getCpuUsage() { |
||||
return cpuUsage; |
||||
} |
||||
|
||||
public void setCpuUsage(double cpuUsage) { |
||||
this.cpuUsage = cpuUsage; |
||||
} |
||||
|
||||
public double getMemoryUsage() { |
||||
return memoryUsage; |
||||
} |
||||
|
||||
public void setMemoryUsage(double memoryUsage) { |
||||
this.memoryUsage = memoryUsage; |
||||
} |
||||
|
||||
public double getLoadAverage() { |
||||
return loadAverage; |
||||
} |
||||
|
||||
public void setLoadAverage(double loadAverage) { |
||||
this.loadAverage = loadAverage; |
||||
} |
||||
|
||||
public double getAvailablePhysicalMemorySize() { |
||||
return availablePhysicalMemorySize; |
||||
} |
||||
|
||||
public void setAvailablePhysicalMemorySize(double availablePhysicalMemorySize) { |
||||
this.availablePhysicalMemorySize = availablePhysicalMemorySize; |
||||
} |
||||
|
||||
public double getMaxCpuloadAvg() { |
||||
return maxCpuloadAvg; |
||||
} |
||||
|
||||
public void setMaxCpuloadAvg(double maxCpuloadAvg) { |
||||
this.maxCpuloadAvg = maxCpuloadAvg; |
||||
} |
||||
|
||||
public double getReservedMemory() { |
||||
return reservedMemory; |
||||
} |
||||
|
||||
public void setReservedMemory(double reservedMemory) { |
||||
this.reservedMemory = reservedMemory; |
||||
} |
||||
|
||||
public int getServerStatus() { |
||||
return serverStatus; |
||||
} |
||||
|
||||
public void setServerStatus(int serverStatus) { |
||||
this.serverStatus = serverStatus; |
||||
} |
||||
|
||||
public int getProcessId() { |
||||
return processId; |
||||
} |
||||
|
||||
public void setProcessId(int processId) { |
||||
this.processId = processId; |
||||
} |
||||
|
||||
public int getWorkerHostWeight() { |
||||
return workerHostWeight; |
||||
} |
||||
|
||||
public void setWorkerHostWeight(int workerHostWeight) { |
||||
this.workerHostWeight = workerHostWeight; |
||||
} |
||||
|
||||
public int getWorkerWaitingTaskCount() { |
||||
return workerWaitingTaskCount; |
||||
} |
||||
|
||||
public void setWorkerWaitingTaskCount(int workerWaitingTaskCount) { |
||||
this.workerWaitingTaskCount = workerWaitingTaskCount; |
||||
} |
||||
|
||||
public int getWorkerExecThreadCount() { |
||||
return workerExecThreadCount; |
||||
} |
||||
|
||||
public void setWorkerExecThreadCount(int workerExecThreadCount) { |
||||
this.workerExecThreadCount = workerExecThreadCount; |
||||
} |
||||
|
||||
public HeartBeat() { |
||||
this.reportTime = System.currentTimeMillis(); |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
} |
||||
|
||||
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory) { |
||||
this.reportTime = System.currentTimeMillis(); |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
this.startupTime = startupTime; |
||||
this.maxCpuloadAvg = maxCpuloadAvg; |
||||
this.reservedMemory = reservedMemory; |
||||
} |
||||
|
||||
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory, int hostWeight, int workerExecThreadCount) { |
||||
this.reportTime = System.currentTimeMillis(); |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
this.startupTime = startupTime; |
||||
this.maxCpuloadAvg = maxCpuloadAvg; |
||||
this.reservedMemory = reservedMemory; |
||||
this.workerHostWeight = hostWeight; |
||||
this.workerExecThreadCount = workerExecThreadCount; |
||||
} |
||||
|
||||
/** |
||||
* fill system info |
||||
*/ |
||||
private void fillSystemInfo() { |
||||
this.cpuUsage = OSUtils.cpuUsage(); |
||||
this.loadAverage = OSUtils.loadAverage(); |
||||
this.availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize(); |
||||
this.memoryUsage = OSUtils.memoryUsage(); |
||||
this.diskAvailable = OSUtils.diskAvailable(); |
||||
this.processId = OSUtils.getProcessID(); |
||||
} |
||||
|
||||
/** |
||||
* update server state |
||||
*/ |
||||
public void updateServerState() { |
||||
this.reportTime = System.currentTimeMillis(); |
||||
if (loadAverage > maxCpuloadAvg || availablePhysicalMemorySize < reservedMemory) { |
||||
logger.warn("current cpu load average {} is too high or available memory {}G is too low, under max.cpuload.avg={} and reserved.memory={}G", |
||||
loadAverage, availablePhysicalMemorySize, maxCpuloadAvg, reservedMemory); |
||||
this.serverStatus = Constants.ABNORMAL_NODE_STATUS; |
||||
} else if (workerWaitingTaskCount > workerExecThreadCount) { |
||||
logger.warn("current waiting task count {} is large than worker thread count {}, worker is busy", workerWaitingTaskCount, workerExecThreadCount); |
||||
this.serverStatus = Constants.BUSY_NODE_STATUE; |
||||
} else { |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* encode heartbeat |
||||
*/ |
||||
public String encodeHeartBeat() { |
||||
this.fillSystemInfo(); |
||||
this.updateServerState(); |
||||
|
||||
StringBuilder builder = new StringBuilder(100); |
||||
builder.append(cpuUsage).append(Constants.COMMA); |
||||
builder.append(memoryUsage).append(Constants.COMMA); |
||||
builder.append(loadAverage).append(Constants.COMMA); |
||||
builder.append(availablePhysicalMemorySize).append(Constants.COMMA); |
||||
builder.append(maxCpuloadAvg).append(Constants.COMMA); |
||||
builder.append(reservedMemory).append(Constants.COMMA); |
||||
builder.append(startupTime).append(Constants.COMMA); |
||||
builder.append(reportTime).append(Constants.COMMA); |
||||
builder.append(serverStatus).append(Constants.COMMA); |
||||
builder.append(processId).append(Constants.COMMA); |
||||
builder.append(workerHostWeight).append(Constants.COMMA); |
||||
builder.append(workerExecThreadCount).append(Constants.COMMA); |
||||
builder.append(workerWaitingTaskCount).append(Constants.COMMA); |
||||
builder.append(diskAvailable); |
||||
|
||||
return builder.toString(); |
||||
} |
||||
|
||||
/** |
||||
* decode heartbeat |
||||
*/ |
||||
public static HeartBeat decodeHeartBeat(String heartBeatInfo) { |
||||
String[] parts = heartBeatInfo.split(Constants.COMMA); |
||||
if (parts.length != Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH) { |
||||
return null; |
||||
} |
||||
HeartBeat heartBeat = new HeartBeat(); |
||||
heartBeat.cpuUsage = Double.parseDouble(parts[0]); |
||||
heartBeat.memoryUsage = Double.parseDouble(parts[1]); |
||||
heartBeat.loadAverage = Double.parseDouble(parts[2]); |
||||
heartBeat.availablePhysicalMemorySize = Double.parseDouble(parts[3]); |
||||
heartBeat.maxCpuloadAvg = Double.parseDouble(parts[4]); |
||||
heartBeat.reservedMemory = Double.parseDouble(parts[5]); |
||||
heartBeat.startupTime = Long.parseLong(parts[6]); |
||||
heartBeat.reportTime = Long.parseLong(parts[7]); |
||||
heartBeat.serverStatus = Integer.parseInt(parts[8]); |
||||
heartBeat.processId = Integer.parseInt(parts[9]); |
||||
heartBeat.workerHostWeight = Integer.parseInt(parts[10]); |
||||
heartBeat.workerExecThreadCount = Integer.parseInt(parts[11]); |
||||
heartBeat.workerWaitingTaskCount = Integer.parseInt(parts[12]); |
||||
heartBeat.diskAvailable = Double.parseDouble(parts[13]); |
||||
return heartBeat; |
||||
} |
||||
} |
@ -1,77 +0,0 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.utils; |
||||
|
||||
import static org.junit.Assert.assertEquals; |
||||
|
||||
import org.apache.dolphinscheduler.common.Constants; |
||||
|
||||
import org.junit.Test; |
||||
|
||||
/** |
||||
* NetUtilsTest |
||||
*/ |
||||
public class HeartBeatTest { |
||||
|
||||
@Test |
||||
public void testAbnormalState() { |
||||
long startupTime = System.currentTimeMillis(); |
||||
double loadAverage = 100; |
||||
double reservedMemory = 100; |
||||
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory); |
||||
heartBeat.updateServerState(); |
||||
assertEquals(Constants.ABNORMAL_NODE_STATUS, heartBeat.getServerStatus()); |
||||
} |
||||
|
||||
@Test |
||||
public void testBusyState() { |
||||
long startupTime = System.currentTimeMillis(); |
||||
double loadAverage = 0; |
||||
double reservedMemory = 0; |
||||
int hostWeight = 1; |
||||
int taskCount = 200; |
||||
int workerThreadCount = 199; |
||||
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory, hostWeight, workerThreadCount); |
||||
|
||||
heartBeat.setWorkerWaitingTaskCount(taskCount); |
||||
heartBeat.updateServerState(); |
||||
assertEquals(Constants.BUSY_NODE_STATUE, heartBeat.getServerStatus()); |
||||
} |
||||
|
||||
@Test |
||||
public void testDecodeHeartBeat() throws Exception { |
||||
String heartBeatInfo = "0.35,0.58,3.09,6.47,5.0,1.0,1634033006749,1634033006857,1,29732,1,199,200,65.86"; |
||||
HeartBeat heartBeat = HeartBeat.decodeHeartBeat(heartBeatInfo); |
||||
|
||||
double delta = 0.001; |
||||
assertEquals(0.35, heartBeat.getCpuUsage(), delta); |
||||
assertEquals(0.58, heartBeat.getMemoryUsage(), delta); |
||||
assertEquals(3.09, heartBeat.getLoadAverage(), delta); |
||||
assertEquals(6.47, heartBeat.getAvailablePhysicalMemorySize(), delta); |
||||
assertEquals(5.0, heartBeat.getMaxCpuloadAvg(), delta); |
||||
assertEquals(1.0, heartBeat.getReservedMemory(), delta); |
||||
assertEquals(1634033006749L, heartBeat.getStartupTime()); |
||||
assertEquals(1634033006857L, heartBeat.getReportTime()); |
||||
assertEquals(1, heartBeat.getServerStatus()); |
||||
assertEquals(29732, heartBeat.getProcessId()); |
||||
assertEquals(199, heartBeat.getWorkerExecThreadCount()); |
||||
assertEquals(200, heartBeat.getWorkerWaitingTaskCount()); |
||||
assertEquals(65.86, heartBeat.getDiskAvailable(), delta); |
||||
} |
||||
|
||||
} |
@ -1,70 +0,0 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.server.master.registry; |
||||
|
||||
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||
import org.apache.dolphinscheduler.common.utils.HeartBeat; |
||||
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||
|
||||
import java.util.Set; |
||||
import java.util.concurrent.atomic.AtomicInteger; |
||||
|
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
|
||||
/** |
||||
* Master heart beat task |
||||
*/ |
||||
public class MasterHeartBeatTask implements Runnable { |
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(MasterHeartBeatTask.class); |
||||
|
||||
private final Set<String> heartBeatPaths; |
||||
private final RegistryClient registryClient; |
||||
private final HeartBeat heartBeat; |
||||
private final AtomicInteger heartBeatErrorTimes = new AtomicInteger(); |
||||
|
||||
public MasterHeartBeatTask(long startupTime, |
||||
double maxCpuloadAvg, |
||||
double reservedMemory, |
||||
Set<String> heartBeatPaths, |
||||
RegistryClient registryClient) { |
||||
this.heartBeatPaths = heartBeatPaths; |
||||
this.registryClient = registryClient; |
||||
this.heartBeat = new HeartBeat(startupTime, maxCpuloadAvg, reservedMemory); |
||||
} |
||||
|
||||
public String getHeartBeatInfo() { |
||||
return this.heartBeat.encodeHeartBeat(); |
||||
} |
||||
|
||||
@Override |
||||
public void run() { |
||||
try { |
||||
if (!ServerLifeCycleManager.isRunning()) { |
||||
return; |
||||
} |
||||
for (String heartBeatPath : heartBeatPaths) { |
||||
registryClient.persistEphemeral(heartBeatPath, heartBeat.encodeHeartBeat()); |
||||
} |
||||
heartBeatErrorTimes.set(0); |
||||
} catch (Throwable ex) { |
||||
logger.error("HeartBeat task execute failed, errorTimes: {}", heartBeatErrorTimes.get(), ex); |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,71 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.server.master.task; |
||||
|
||||
import lombok.NonNull; |
||||
import lombok.extern.slf4j.Slf4j; |
||||
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||
import org.apache.dolphinscheduler.common.model.BaseHeartBeatTask; |
||||
import org.apache.dolphinscheduler.common.model.MasterHeartBeat; |
||||
import org.apache.dolphinscheduler.common.utils.JSONUtils; |
||||
import org.apache.dolphinscheduler.common.utils.OSUtils; |
||||
import org.apache.dolphinscheduler.server.master.config.MasterConfig; |
||||
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||
|
||||
@Slf4j |
||||
public class MasterHeartBeatTask extends BaseHeartBeatTask<MasterHeartBeat> { |
||||
|
||||
private final MasterConfig masterConfig; |
||||
|
||||
private final RegistryClient registryClient; |
||||
|
||||
private final String heartBeatPath; |
||||
|
||||
private final int processId; |
||||
|
||||
public MasterHeartBeatTask(@NonNull MasterConfig masterConfig, |
||||
@NonNull RegistryClient registryClient) { |
||||
super("MasterHeartBeatTask", masterConfig.getHeartbeatInterval().toMillis()); |
||||
this.masterConfig = masterConfig; |
||||
this.registryClient = registryClient; |
||||
this.heartBeatPath = masterConfig.getMasterRegistryPath(); |
||||
this.processId = OSUtils.getProcessID(); |
||||
} |
||||
|
||||
@Override |
||||
public MasterHeartBeat getHeartBeat() { |
||||
return MasterHeartBeat.builder() |
||||
.startupTime(ServerLifeCycleManager.getServerStartupTime()) |
||||
.reportTime(System.currentTimeMillis()) |
||||
.cpuUsage(OSUtils.cpuUsage()) |
||||
.loadAverage(OSUtils.loadAverage()) |
||||
.availablePhysicalMemorySize(OSUtils.availablePhysicalMemorySize()) |
||||
.maxCpuloadAvg(masterConfig.getMaxCpuLoadAvg()) |
||||
.reservedMemory(masterConfig.getReservedMemory()) |
||||
.processId(processId) |
||||
.build(); |
||||
} |
||||
|
||||
@Override |
||||
public void writeHeartBeat(MasterHeartBeat masterHeartBeat) { |
||||
String masterHeartBeatJson = JSONUtils.toJsonString(masterHeartBeat); |
||||
registryClient.persistEphemeral(heartBeatPath, masterHeartBeatJson); |
||||
log.info("Success write master heartBeatInfo into registry, masterRegistryPath: {}, heartBeatInfo: {}", |
||||
heartBeatPath, masterHeartBeatJson); |
||||
} |
||||
} |
@ -1,79 +0,0 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.server.worker.registry; |
||||
|
||||
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||
import org.apache.dolphinscheduler.common.utils.HeartBeat; |
||||
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
|
||||
import java.util.Set; |
||||
import java.util.concurrent.atomic.AtomicInteger; |
||||
|
||||
/** |
||||
* Heart beat task |
||||
*/ |
||||
public class WorkerHeartBeatTask implements Runnable { |
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(WorkerHeartBeatTask.class); |
||||
|
||||
private final Set<String> heartBeatPaths; |
||||
private final RegistryClient registryClient; |
||||
private int workerWaitingTaskCount; |
||||
private final HeartBeat heartBeat; |
||||
|
||||
private final AtomicInteger heartBeatErrorTimes = new AtomicInteger(); |
||||
|
||||
public WorkerHeartBeatTask(long startupTime, |
||||
double maxCpuloadAvg, |
||||
double reservedMemory, |
||||
int hostWeight, |
||||
Set<String> heartBeatPaths, |
||||
RegistryClient registryClient, |
||||
int workerThreadCount, |
||||
int workerWaitingTaskCount) { |
||||
this.heartBeatPaths = heartBeatPaths; |
||||
this.registryClient = registryClient; |
||||
this.workerWaitingTaskCount = workerWaitingTaskCount; |
||||
this.heartBeat = new HeartBeat(startupTime, maxCpuloadAvg, reservedMemory, hostWeight, workerThreadCount); |
||||
} |
||||
|
||||
public String getHeartBeatInfo() { |
||||
return this.heartBeat.encodeHeartBeat(); |
||||
} |
||||
|
||||
@Override |
||||
public void run() { |
||||
try { |
||||
if (!ServerLifeCycleManager.isRunning()) { |
||||
return; |
||||
} |
||||
heartBeat.setStartupTime(ServerLifeCycleManager.getServerStartupTime()); |
||||
// update waiting task count
|
||||
heartBeat.setWorkerWaitingTaskCount(workerWaitingTaskCount); |
||||
|
||||
for (String heartBeatPath : heartBeatPaths) { |
||||
registryClient.persistEphemeral(heartBeatPath, heartBeat.encodeHeartBeat()); |
||||
} |
||||
heartBeatErrorTimes.set(0); |
||||
} catch (Throwable ex) { |
||||
logger.error("HeartBeat task execute failed, errorTimes: {}", heartBeatErrorTimes.get(), ex); |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,107 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.server.worker.task; |
||||
|
||||
import lombok.NonNull; |
||||
import lombok.extern.slf4j.Slf4j; |
||||
import org.apache.dolphinscheduler.common.Constants; |
||||
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||
import org.apache.dolphinscheduler.common.model.BaseHeartBeatTask; |
||||
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat; |
||||
import org.apache.dolphinscheduler.common.utils.JSONUtils; |
||||
import org.apache.dolphinscheduler.common.utils.OSUtils; |
||||
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig; |
||||
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||
|
||||
import java.util.function.Supplier; |
||||
|
||||
@Slf4j |
||||
public class WorkerHeartBeatTask extends BaseHeartBeatTask<WorkerHeartBeat> { |
||||
|
||||
private final WorkerConfig workerConfig; |
||||
private final RegistryClient registryClient; |
||||
|
||||
private final Supplier<Integer> workerWaitingTaskCount; |
||||
|
||||
private final int processId; |
||||
|
||||
public WorkerHeartBeatTask(@NonNull WorkerConfig workerConfig, |
||||
@NonNull RegistryClient registryClient, |
||||
@NonNull Supplier<Integer> workerWaitingTaskCount) { |
||||
super("WorkerHeartBeatTask", workerConfig.getHeartbeatInterval().toMillis()); |
||||
this.workerConfig = workerConfig; |
||||
this.registryClient = registryClient; |
||||
this.workerWaitingTaskCount = workerWaitingTaskCount; |
||||
this.processId = OSUtils.getProcessID(); |
||||
} |
||||
|
||||
@Override |
||||
public WorkerHeartBeat getHeartBeat() { |
||||
double loadAverage = OSUtils.loadAverage(); |
||||
double cpuUsage = OSUtils.cpuUsage(); |
||||
int maxCpuLoadAvg = workerConfig.getMaxCpuLoadAvg(); |
||||
double reservedMemory = workerConfig.getReservedMemory(); |
||||
double availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize(); |
||||
int execThreads = workerConfig.getExecThreads(); |
||||
int workerWaitingTaskCount = this.workerWaitingTaskCount.get(); |
||||
int serverStatus = getServerStatus(loadAverage, maxCpuLoadAvg, availablePhysicalMemorySize, reservedMemory, execThreads, workerWaitingTaskCount); |
||||
|
||||
return WorkerHeartBeat.builder() |
||||
.startupTime(ServerLifeCycleManager.getServerStartupTime()) |
||||
.reportTime(System.currentTimeMillis()) |
||||
.cpuUsage(cpuUsage) |
||||
.loadAverage(loadAverage) |
||||
.availablePhysicalMemorySize(availablePhysicalMemorySize) |
||||
.maxCpuloadAvg(maxCpuLoadAvg) |
||||
.reservedMemory(reservedMemory) |
||||
.processId(processId) |
||||
.workerHostWeight(workerConfig.getHostWeight()) |
||||
.workerWaitingTaskCount(this.workerWaitingTaskCount.get()) |
||||
.workerExecThreadCount(workerConfig.getExecThreads()) |
||||
.serverStatus(serverStatus) |
||||
.build(); |
||||
} |
||||
|
||||
@Override |
||||
public void writeHeartBeat(WorkerHeartBeat workerHeartBeat) { |
||||
String workerHeartBeatJson = JSONUtils.toJsonString(workerHeartBeat); |
||||
for (String workerGroupRegistryPath : workerConfig.getWorkerGroupRegistryPaths()) { |
||||
registryClient.persistEphemeral(workerGroupRegistryPath, workerHeartBeatJson); |
||||
} |
||||
log.info("Success write worker group heartBeatInfo into registry, workGroupPath: {} workerHeartBeatInfo: {}", |
||||
workerConfig.getWorkerGroupRegistryPaths(), workerHeartBeatJson); |
||||
} |
||||
|
||||
public int getServerStatus(double loadAverage, |
||||
double maxCpuloadAvg, |
||||
double availablePhysicalMemorySize, |
||||
double reservedMemory, |
||||
int workerExecThreadCount, |
||||
int workerWaitingTaskCount) { |
||||
if (loadAverage > maxCpuloadAvg || availablePhysicalMemorySize < reservedMemory) { |
||||
log.warn("current cpu load average {} is too high or available memory {}G is too low, under max.cpuload.avg={} and reserved.memory={}G", |
||||
loadAverage, availablePhysicalMemorySize, maxCpuloadAvg, reservedMemory); |
||||
return Constants.ABNORMAL_NODE_STATUS; |
||||
} else if (workerWaitingTaskCount > workerExecThreadCount) { |
||||
log.warn("current waiting task count {} is large than worker thread count {}, worker is busy", workerWaitingTaskCount, workerExecThreadCount); |
||||
return Constants.BUSY_NODE_STATUE; |
||||
} else { |
||||
return Constants.NORMAL_NODE_STATUS; |
||||
} |
||||
} |
||||
} |
Loading…
Reference in new issue