Wenjun Ruan
2 years ago
committed by
GitHub
25 changed files with 583 additions and 886 deletions
@ -0,0 +1,81 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.common.model; |
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j; |
||||||
|
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||||
|
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread; |
||||||
|
|
||||||
|
@Slf4j |
||||||
|
public abstract class BaseHeartBeatTask<T> extends BaseDaemonThread { |
||||||
|
|
||||||
|
private final String threadName; |
||||||
|
private final long heartBeatInterval; |
||||||
|
|
||||||
|
protected boolean runningFlag; |
||||||
|
|
||||||
|
public BaseHeartBeatTask(String threadName, long heartBeatInterval) { |
||||||
|
super(threadName); |
||||||
|
this.threadName = threadName; |
||||||
|
this.heartBeatInterval = heartBeatInterval; |
||||||
|
this.runningFlag = true; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public synchronized void start() { |
||||||
|
log.info("Starting {}", threadName); |
||||||
|
super.start(); |
||||||
|
log.info("Started {}, heartBeatInterval: {}", threadName, heartBeatInterval); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void run() { |
||||||
|
while (runningFlag) { |
||||||
|
try { |
||||||
|
if (!ServerLifeCycleManager.isRunning()) { |
||||||
|
log.info("The current server status is {}, will not write heartBeatInfo into registry", ServerLifeCycleManager.getServerStatus()); |
||||||
|
continue; |
||||||
|
} |
||||||
|
T heartBeat = getHeartBeat(); |
||||||
|
writeHeartBeat(heartBeat); |
||||||
|
} catch (Exception ex) { |
||||||
|
log.error("{} task execute failed", threadName, ex); |
||||||
|
} finally { |
||||||
|
try { |
||||||
|
Thread.sleep(heartBeatInterval); |
||||||
|
} catch (InterruptedException e) { |
||||||
|
handleInterruptException(e); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
public void shutdown() { |
||||||
|
log.warn("{} task finished", threadName); |
||||||
|
runningFlag = false; |
||||||
|
} |
||||||
|
|
||||||
|
private void handleInterruptException(InterruptedException ex) { |
||||||
|
log.warn("{} has been interrupted", threadName, ex); |
||||||
|
Thread.currentThread().interrupt(); |
||||||
|
} |
||||||
|
|
||||||
|
public abstract T getHeartBeat(); |
||||||
|
|
||||||
|
public abstract void writeHeartBeat(T heartBeat); |
||||||
|
} |
@ -0,0 +1,21 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.common.model; |
||||||
|
|
||||||
|
public interface HeartBeat { |
||||||
|
} |
@ -0,0 +1,39 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.common.model; |
||||||
|
|
||||||
|
import lombok.AllArgsConstructor; |
||||||
|
import lombok.Builder; |
||||||
|
import lombok.Data; |
||||||
|
import lombok.NoArgsConstructor; |
||||||
|
|
||||||
|
@Data |
||||||
|
@Builder |
||||||
|
@NoArgsConstructor |
||||||
|
@AllArgsConstructor |
||||||
|
public class MasterHeartBeat implements HeartBeat { |
||||||
|
private long startupTime; |
||||||
|
private long reportTime; |
||||||
|
private double cpuUsage; |
||||||
|
private double memoryUsage; |
||||||
|
private double loadAverage; |
||||||
|
private double availablePhysicalMemorySize; |
||||||
|
private double maxCpuloadAvg; |
||||||
|
private double reservedMemory; |
||||||
|
private int processId; |
||||||
|
} |
@ -0,0 +1,47 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.common.model; |
||||||
|
|
||||||
|
import lombok.AllArgsConstructor; |
||||||
|
import lombok.Builder; |
||||||
|
import lombok.Data; |
||||||
|
import lombok.NoArgsConstructor; |
||||||
|
|
||||||
|
@Data |
||||||
|
@Builder |
||||||
|
@NoArgsConstructor |
||||||
|
@AllArgsConstructor |
||||||
|
public class WorkerHeartBeat implements HeartBeat { |
||||||
|
|
||||||
|
private long startupTime; |
||||||
|
private long reportTime; |
||||||
|
private double cpuUsage; |
||||||
|
private double memoryUsage; |
||||||
|
private double loadAverage; |
||||||
|
private double availablePhysicalMemorySize; |
||||||
|
private double maxCpuloadAvg; |
||||||
|
private double reservedMemory; |
||||||
|
private int serverStatus; |
||||||
|
private int processId; |
||||||
|
|
||||||
|
private int workerHostWeight; // worker host weight
|
||||||
|
private int workerWaitingTaskCount; // worker waiting task count
|
||||||
|
private int workerExecThreadCount; // worker thread pool thread count
|
||||||
|
|
||||||
|
|
||||||
|
} |
@ -1,261 +0,0 @@ |
|||||||
/* |
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
|
||||||
* contributor license agreements. See the NOTICE file distributed with |
|
||||||
* this work for additional information regarding copyright ownership. |
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
|
||||||
* (the "License"); you may not use this file except in compliance with |
|
||||||
* the License. You may obtain a copy of the License at |
|
||||||
* |
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
* |
|
||||||
* Unless required by applicable law or agreed to in writing, software |
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||||
* See the License for the specific language governing permissions and |
|
||||||
* limitations under the License. |
|
||||||
*/ |
|
||||||
|
|
||||||
package org.apache.dolphinscheduler.common.utils; |
|
||||||
|
|
||||||
import org.apache.dolphinscheduler.common.Constants; |
|
||||||
|
|
||||||
import org.slf4j.Logger; |
|
||||||
import org.slf4j.LoggerFactory; |
|
||||||
|
|
||||||
public class HeartBeat { |
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(HeartBeat.class); |
|
||||||
|
|
||||||
private long startupTime; |
|
||||||
private long reportTime; |
|
||||||
private double cpuUsage; |
|
||||||
private double memoryUsage; |
|
||||||
private double loadAverage; |
|
||||||
private double availablePhysicalMemorySize; |
|
||||||
private double maxCpuloadAvg; |
|
||||||
private double reservedMemory; |
|
||||||
private int serverStatus; |
|
||||||
private int processId; |
|
||||||
|
|
||||||
private int workerHostWeight; // worker host weight
|
|
||||||
private int workerWaitingTaskCount; // worker waiting task count
|
|
||||||
private int workerExecThreadCount; // worker thread pool thread count
|
|
||||||
|
|
||||||
private double diskAvailable; |
|
||||||
|
|
||||||
public double getDiskAvailable() { |
|
||||||
return diskAvailable; |
|
||||||
} |
|
||||||
|
|
||||||
public void setDiskAvailable(double diskAvailable) { |
|
||||||
this.diskAvailable = diskAvailable; |
|
||||||
} |
|
||||||
|
|
||||||
public long getStartupTime() { |
|
||||||
return startupTime; |
|
||||||
} |
|
||||||
|
|
||||||
public void setStartupTime(long startupTime) { |
|
||||||
this.startupTime = startupTime; |
|
||||||
} |
|
||||||
|
|
||||||
public long getReportTime() { |
|
||||||
return reportTime; |
|
||||||
} |
|
||||||
|
|
||||||
public void setReportTime(long reportTime) { |
|
||||||
this.reportTime = reportTime; |
|
||||||
} |
|
||||||
|
|
||||||
public double getCpuUsage() { |
|
||||||
return cpuUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public void setCpuUsage(double cpuUsage) { |
|
||||||
this.cpuUsage = cpuUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public double getMemoryUsage() { |
|
||||||
return memoryUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public void setMemoryUsage(double memoryUsage) { |
|
||||||
this.memoryUsage = memoryUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public double getLoadAverage() { |
|
||||||
return loadAverage; |
|
||||||
} |
|
||||||
|
|
||||||
public void setLoadAverage(double loadAverage) { |
|
||||||
this.loadAverage = loadAverage; |
|
||||||
} |
|
||||||
|
|
||||||
public double getAvailablePhysicalMemorySize() { |
|
||||||
return availablePhysicalMemorySize; |
|
||||||
} |
|
||||||
|
|
||||||
public void setAvailablePhysicalMemorySize(double availablePhysicalMemorySize) { |
|
||||||
this.availablePhysicalMemorySize = availablePhysicalMemorySize; |
|
||||||
} |
|
||||||
|
|
||||||
public double getMaxCpuloadAvg() { |
|
||||||
return maxCpuloadAvg; |
|
||||||
} |
|
||||||
|
|
||||||
public void setMaxCpuloadAvg(double maxCpuloadAvg) { |
|
||||||
this.maxCpuloadAvg = maxCpuloadAvg; |
|
||||||
} |
|
||||||
|
|
||||||
public double getReservedMemory() { |
|
||||||
return reservedMemory; |
|
||||||
} |
|
||||||
|
|
||||||
public void setReservedMemory(double reservedMemory) { |
|
||||||
this.reservedMemory = reservedMemory; |
|
||||||
} |
|
||||||
|
|
||||||
public int getServerStatus() { |
|
||||||
return serverStatus; |
|
||||||
} |
|
||||||
|
|
||||||
public void setServerStatus(int serverStatus) { |
|
||||||
this.serverStatus = serverStatus; |
|
||||||
} |
|
||||||
|
|
||||||
public int getProcessId() { |
|
||||||
return processId; |
|
||||||
} |
|
||||||
|
|
||||||
public void setProcessId(int processId) { |
|
||||||
this.processId = processId; |
|
||||||
} |
|
||||||
|
|
||||||
public int getWorkerHostWeight() { |
|
||||||
return workerHostWeight; |
|
||||||
} |
|
||||||
|
|
||||||
public void setWorkerHostWeight(int workerHostWeight) { |
|
||||||
this.workerHostWeight = workerHostWeight; |
|
||||||
} |
|
||||||
|
|
||||||
public int getWorkerWaitingTaskCount() { |
|
||||||
return workerWaitingTaskCount; |
|
||||||
} |
|
||||||
|
|
||||||
public void setWorkerWaitingTaskCount(int workerWaitingTaskCount) { |
|
||||||
this.workerWaitingTaskCount = workerWaitingTaskCount; |
|
||||||
} |
|
||||||
|
|
||||||
public int getWorkerExecThreadCount() { |
|
||||||
return workerExecThreadCount; |
|
||||||
} |
|
||||||
|
|
||||||
public void setWorkerExecThreadCount(int workerExecThreadCount) { |
|
||||||
this.workerExecThreadCount = workerExecThreadCount; |
|
||||||
} |
|
||||||
|
|
||||||
public HeartBeat() { |
|
||||||
this.reportTime = System.currentTimeMillis(); |
|
||||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
|
||||||
} |
|
||||||
|
|
||||||
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory) { |
|
||||||
this.reportTime = System.currentTimeMillis(); |
|
||||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
|
||||||
this.startupTime = startupTime; |
|
||||||
this.maxCpuloadAvg = maxCpuloadAvg; |
|
||||||
this.reservedMemory = reservedMemory; |
|
||||||
} |
|
||||||
|
|
||||||
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory, int hostWeight, int workerExecThreadCount) { |
|
||||||
this.reportTime = System.currentTimeMillis(); |
|
||||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
|
||||||
this.startupTime = startupTime; |
|
||||||
this.maxCpuloadAvg = maxCpuloadAvg; |
|
||||||
this.reservedMemory = reservedMemory; |
|
||||||
this.workerHostWeight = hostWeight; |
|
||||||
this.workerExecThreadCount = workerExecThreadCount; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* fill system info |
|
||||||
*/ |
|
||||||
private void fillSystemInfo() { |
|
||||||
this.cpuUsage = OSUtils.cpuUsage(); |
|
||||||
this.loadAverage = OSUtils.loadAverage(); |
|
||||||
this.availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize(); |
|
||||||
this.memoryUsage = OSUtils.memoryUsage(); |
|
||||||
this.diskAvailable = OSUtils.diskAvailable(); |
|
||||||
this.processId = OSUtils.getProcessID(); |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* update server state |
|
||||||
*/ |
|
||||||
public void updateServerState() { |
|
||||||
this.reportTime = System.currentTimeMillis(); |
|
||||||
if (loadAverage > maxCpuloadAvg || availablePhysicalMemorySize < reservedMemory) { |
|
||||||
logger.warn("current cpu load average {} is too high or available memory {}G is too low, under max.cpuload.avg={} and reserved.memory={}G", |
|
||||||
loadAverage, availablePhysicalMemorySize, maxCpuloadAvg, reservedMemory); |
|
||||||
this.serverStatus = Constants.ABNORMAL_NODE_STATUS; |
|
||||||
} else if (workerWaitingTaskCount > workerExecThreadCount) { |
|
||||||
logger.warn("current waiting task count {} is large than worker thread count {}, worker is busy", workerWaitingTaskCount, workerExecThreadCount); |
|
||||||
this.serverStatus = Constants.BUSY_NODE_STATUE; |
|
||||||
} else { |
|
||||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* encode heartbeat |
|
||||||
*/ |
|
||||||
public String encodeHeartBeat() { |
|
||||||
this.fillSystemInfo(); |
|
||||||
this.updateServerState(); |
|
||||||
|
|
||||||
StringBuilder builder = new StringBuilder(100); |
|
||||||
builder.append(cpuUsage).append(Constants.COMMA); |
|
||||||
builder.append(memoryUsage).append(Constants.COMMA); |
|
||||||
builder.append(loadAverage).append(Constants.COMMA); |
|
||||||
builder.append(availablePhysicalMemorySize).append(Constants.COMMA); |
|
||||||
builder.append(maxCpuloadAvg).append(Constants.COMMA); |
|
||||||
builder.append(reservedMemory).append(Constants.COMMA); |
|
||||||
builder.append(startupTime).append(Constants.COMMA); |
|
||||||
builder.append(reportTime).append(Constants.COMMA); |
|
||||||
builder.append(serverStatus).append(Constants.COMMA); |
|
||||||
builder.append(processId).append(Constants.COMMA); |
|
||||||
builder.append(workerHostWeight).append(Constants.COMMA); |
|
||||||
builder.append(workerExecThreadCount).append(Constants.COMMA); |
|
||||||
builder.append(workerWaitingTaskCount).append(Constants.COMMA); |
|
||||||
builder.append(diskAvailable); |
|
||||||
|
|
||||||
return builder.toString(); |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* decode heartbeat |
|
||||||
*/ |
|
||||||
public static HeartBeat decodeHeartBeat(String heartBeatInfo) { |
|
||||||
String[] parts = heartBeatInfo.split(Constants.COMMA); |
|
||||||
if (parts.length != Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH) { |
|
||||||
return null; |
|
||||||
} |
|
||||||
HeartBeat heartBeat = new HeartBeat(); |
|
||||||
heartBeat.cpuUsage = Double.parseDouble(parts[0]); |
|
||||||
heartBeat.memoryUsage = Double.parseDouble(parts[1]); |
|
||||||
heartBeat.loadAverage = Double.parseDouble(parts[2]); |
|
||||||
heartBeat.availablePhysicalMemorySize = Double.parseDouble(parts[3]); |
|
||||||
heartBeat.maxCpuloadAvg = Double.parseDouble(parts[4]); |
|
||||||
heartBeat.reservedMemory = Double.parseDouble(parts[5]); |
|
||||||
heartBeat.startupTime = Long.parseLong(parts[6]); |
|
||||||
heartBeat.reportTime = Long.parseLong(parts[7]); |
|
||||||
heartBeat.serverStatus = Integer.parseInt(parts[8]); |
|
||||||
heartBeat.processId = Integer.parseInt(parts[9]); |
|
||||||
heartBeat.workerHostWeight = Integer.parseInt(parts[10]); |
|
||||||
heartBeat.workerExecThreadCount = Integer.parseInt(parts[11]); |
|
||||||
heartBeat.workerWaitingTaskCount = Integer.parseInt(parts[12]); |
|
||||||
heartBeat.diskAvailable = Double.parseDouble(parts[13]); |
|
||||||
return heartBeat; |
|
||||||
} |
|
||||||
} |
|
@ -1,77 +0,0 @@ |
|||||||
/* |
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
|
||||||
* contributor license agreements. See the NOTICE file distributed with |
|
||||||
* this work for additional information regarding copyright ownership. |
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
|
||||||
* (the "License"); you may not use this file except in compliance with |
|
||||||
* the License. You may obtain a copy of the License at |
|
||||||
* |
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
* |
|
||||||
* Unless required by applicable law or agreed to in writing, software |
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||||
* See the License for the specific language governing permissions and |
|
||||||
* limitations under the License. |
|
||||||
*/ |
|
||||||
|
|
||||||
package org.apache.dolphinscheduler.common.utils; |
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals; |
|
||||||
|
|
||||||
import org.apache.dolphinscheduler.common.Constants; |
|
||||||
|
|
||||||
import org.junit.Test; |
|
||||||
|
|
||||||
/** |
|
||||||
* NetUtilsTest |
|
||||||
*/ |
|
||||||
public class HeartBeatTest { |
|
||||||
|
|
||||||
@Test |
|
||||||
public void testAbnormalState() { |
|
||||||
long startupTime = System.currentTimeMillis(); |
|
||||||
double loadAverage = 100; |
|
||||||
double reservedMemory = 100; |
|
||||||
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory); |
|
||||||
heartBeat.updateServerState(); |
|
||||||
assertEquals(Constants.ABNORMAL_NODE_STATUS, heartBeat.getServerStatus()); |
|
||||||
} |
|
||||||
|
|
||||||
@Test |
|
||||||
public void testBusyState() { |
|
||||||
long startupTime = System.currentTimeMillis(); |
|
||||||
double loadAverage = 0; |
|
||||||
double reservedMemory = 0; |
|
||||||
int hostWeight = 1; |
|
||||||
int taskCount = 200; |
|
||||||
int workerThreadCount = 199; |
|
||||||
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory, hostWeight, workerThreadCount); |
|
||||||
|
|
||||||
heartBeat.setWorkerWaitingTaskCount(taskCount); |
|
||||||
heartBeat.updateServerState(); |
|
||||||
assertEquals(Constants.BUSY_NODE_STATUE, heartBeat.getServerStatus()); |
|
||||||
} |
|
||||||
|
|
||||||
@Test |
|
||||||
public void testDecodeHeartBeat() throws Exception { |
|
||||||
String heartBeatInfo = "0.35,0.58,3.09,6.47,5.0,1.0,1634033006749,1634033006857,1,29732,1,199,200,65.86"; |
|
||||||
HeartBeat heartBeat = HeartBeat.decodeHeartBeat(heartBeatInfo); |
|
||||||
|
|
||||||
double delta = 0.001; |
|
||||||
assertEquals(0.35, heartBeat.getCpuUsage(), delta); |
|
||||||
assertEquals(0.58, heartBeat.getMemoryUsage(), delta); |
|
||||||
assertEquals(3.09, heartBeat.getLoadAverage(), delta); |
|
||||||
assertEquals(6.47, heartBeat.getAvailablePhysicalMemorySize(), delta); |
|
||||||
assertEquals(5.0, heartBeat.getMaxCpuloadAvg(), delta); |
|
||||||
assertEquals(1.0, heartBeat.getReservedMemory(), delta); |
|
||||||
assertEquals(1634033006749L, heartBeat.getStartupTime()); |
|
||||||
assertEquals(1634033006857L, heartBeat.getReportTime()); |
|
||||||
assertEquals(1, heartBeat.getServerStatus()); |
|
||||||
assertEquals(29732, heartBeat.getProcessId()); |
|
||||||
assertEquals(199, heartBeat.getWorkerExecThreadCount()); |
|
||||||
assertEquals(200, heartBeat.getWorkerWaitingTaskCount()); |
|
||||||
assertEquals(65.86, heartBeat.getDiskAvailable(), delta); |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
@ -1,70 +0,0 @@ |
|||||||
/* |
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
|
||||||
* contributor license agreements. See the NOTICE file distributed with |
|
||||||
* this work for additional information regarding copyright ownership. |
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
|
||||||
* (the "License"); you may not use this file except in compliance with |
|
||||||
* the License. You may obtain a copy of the License at |
|
||||||
* |
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
* |
|
||||||
* Unless required by applicable law or agreed to in writing, software |
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||||
* See the License for the specific language governing permissions and |
|
||||||
* limitations under the License. |
|
||||||
*/ |
|
||||||
|
|
||||||
package org.apache.dolphinscheduler.server.master.registry; |
|
||||||
|
|
||||||
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
|
||||||
import org.apache.dolphinscheduler.common.utils.HeartBeat; |
|
||||||
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
|
||||||
|
|
||||||
import java.util.Set; |
|
||||||
import java.util.concurrent.atomic.AtomicInteger; |
|
||||||
|
|
||||||
import org.slf4j.Logger; |
|
||||||
import org.slf4j.LoggerFactory; |
|
||||||
|
|
||||||
/** |
|
||||||
* Master heart beat task |
|
||||||
*/ |
|
||||||
public class MasterHeartBeatTask implements Runnable { |
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(MasterHeartBeatTask.class); |
|
||||||
|
|
||||||
private final Set<String> heartBeatPaths; |
|
||||||
private final RegistryClient registryClient; |
|
||||||
private final HeartBeat heartBeat; |
|
||||||
private final AtomicInteger heartBeatErrorTimes = new AtomicInteger(); |
|
||||||
|
|
||||||
public MasterHeartBeatTask(long startupTime, |
|
||||||
double maxCpuloadAvg, |
|
||||||
double reservedMemory, |
|
||||||
Set<String> heartBeatPaths, |
|
||||||
RegistryClient registryClient) { |
|
||||||
this.heartBeatPaths = heartBeatPaths; |
|
||||||
this.registryClient = registryClient; |
|
||||||
this.heartBeat = new HeartBeat(startupTime, maxCpuloadAvg, reservedMemory); |
|
||||||
} |
|
||||||
|
|
||||||
public String getHeartBeatInfo() { |
|
||||||
return this.heartBeat.encodeHeartBeat(); |
|
||||||
} |
|
||||||
|
|
||||||
@Override |
|
||||||
public void run() { |
|
||||||
try { |
|
||||||
if (!ServerLifeCycleManager.isRunning()) { |
|
||||||
return; |
|
||||||
} |
|
||||||
for (String heartBeatPath : heartBeatPaths) { |
|
||||||
registryClient.persistEphemeral(heartBeatPath, heartBeat.encodeHeartBeat()); |
|
||||||
} |
|
||||||
heartBeatErrorTimes.set(0); |
|
||||||
} catch (Throwable ex) { |
|
||||||
logger.error("HeartBeat task execute failed, errorTimes: {}", heartBeatErrorTimes.get(), ex); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -0,0 +1,71 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.server.master.task; |
||||||
|
|
||||||
|
import lombok.NonNull; |
||||||
|
import lombok.extern.slf4j.Slf4j; |
||||||
|
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||||
|
import org.apache.dolphinscheduler.common.model.BaseHeartBeatTask; |
||||||
|
import org.apache.dolphinscheduler.common.model.MasterHeartBeat; |
||||||
|
import org.apache.dolphinscheduler.common.utils.JSONUtils; |
||||||
|
import org.apache.dolphinscheduler.common.utils.OSUtils; |
||||||
|
import org.apache.dolphinscheduler.server.master.config.MasterConfig; |
||||||
|
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||||
|
|
||||||
|
@Slf4j |
||||||
|
public class MasterHeartBeatTask extends BaseHeartBeatTask<MasterHeartBeat> { |
||||||
|
|
||||||
|
private final MasterConfig masterConfig; |
||||||
|
|
||||||
|
private final RegistryClient registryClient; |
||||||
|
|
||||||
|
private final String heartBeatPath; |
||||||
|
|
||||||
|
private final int processId; |
||||||
|
|
||||||
|
public MasterHeartBeatTask(@NonNull MasterConfig masterConfig, |
||||||
|
@NonNull RegistryClient registryClient) { |
||||||
|
super("MasterHeartBeatTask", masterConfig.getHeartbeatInterval().toMillis()); |
||||||
|
this.masterConfig = masterConfig; |
||||||
|
this.registryClient = registryClient; |
||||||
|
this.heartBeatPath = masterConfig.getMasterRegistryPath(); |
||||||
|
this.processId = OSUtils.getProcessID(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public MasterHeartBeat getHeartBeat() { |
||||||
|
return MasterHeartBeat.builder() |
||||||
|
.startupTime(ServerLifeCycleManager.getServerStartupTime()) |
||||||
|
.reportTime(System.currentTimeMillis()) |
||||||
|
.cpuUsage(OSUtils.cpuUsage()) |
||||||
|
.loadAverage(OSUtils.loadAverage()) |
||||||
|
.availablePhysicalMemorySize(OSUtils.availablePhysicalMemorySize()) |
||||||
|
.maxCpuloadAvg(masterConfig.getMaxCpuLoadAvg()) |
||||||
|
.reservedMemory(masterConfig.getReservedMemory()) |
||||||
|
.processId(processId) |
||||||
|
.build(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void writeHeartBeat(MasterHeartBeat masterHeartBeat) { |
||||||
|
String masterHeartBeatJson = JSONUtils.toJsonString(masterHeartBeat); |
||||||
|
registryClient.persistEphemeral(heartBeatPath, masterHeartBeatJson); |
||||||
|
log.info("Success write master heartBeatInfo into registry, masterRegistryPath: {}, heartBeatInfo: {}", |
||||||
|
heartBeatPath, masterHeartBeatJson); |
||||||
|
} |
||||||
|
} |
@ -1,79 +0,0 @@ |
|||||||
/* |
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
|
||||||
* contributor license agreements. See the NOTICE file distributed with |
|
||||||
* this work for additional information regarding copyright ownership. |
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
|
||||||
* (the "License"); you may not use this file except in compliance with |
|
||||||
* the License. You may obtain a copy of the License at |
|
||||||
* |
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
* |
|
||||||
* Unless required by applicable law or agreed to in writing, software |
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||||
* See the License for the specific language governing permissions and |
|
||||||
* limitations under the License. |
|
||||||
*/ |
|
||||||
|
|
||||||
package org.apache.dolphinscheduler.server.worker.registry; |
|
||||||
|
|
||||||
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
|
||||||
import org.apache.dolphinscheduler.common.utils.HeartBeat; |
|
||||||
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
|
||||||
import org.slf4j.Logger; |
|
||||||
import org.slf4j.LoggerFactory; |
|
||||||
|
|
||||||
import java.util.Set; |
|
||||||
import java.util.concurrent.atomic.AtomicInteger; |
|
||||||
|
|
||||||
/** |
|
||||||
* Heart beat task |
|
||||||
*/ |
|
||||||
public class WorkerHeartBeatTask implements Runnable { |
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(WorkerHeartBeatTask.class); |
|
||||||
|
|
||||||
private final Set<String> heartBeatPaths; |
|
||||||
private final RegistryClient registryClient; |
|
||||||
private int workerWaitingTaskCount; |
|
||||||
private final HeartBeat heartBeat; |
|
||||||
|
|
||||||
private final AtomicInteger heartBeatErrorTimes = new AtomicInteger(); |
|
||||||
|
|
||||||
public WorkerHeartBeatTask(long startupTime, |
|
||||||
double maxCpuloadAvg, |
|
||||||
double reservedMemory, |
|
||||||
int hostWeight, |
|
||||||
Set<String> heartBeatPaths, |
|
||||||
RegistryClient registryClient, |
|
||||||
int workerThreadCount, |
|
||||||
int workerWaitingTaskCount) { |
|
||||||
this.heartBeatPaths = heartBeatPaths; |
|
||||||
this.registryClient = registryClient; |
|
||||||
this.workerWaitingTaskCount = workerWaitingTaskCount; |
|
||||||
this.heartBeat = new HeartBeat(startupTime, maxCpuloadAvg, reservedMemory, hostWeight, workerThreadCount); |
|
||||||
} |
|
||||||
|
|
||||||
public String getHeartBeatInfo() { |
|
||||||
return this.heartBeat.encodeHeartBeat(); |
|
||||||
} |
|
||||||
|
|
||||||
@Override |
|
||||||
public void run() { |
|
||||||
try { |
|
||||||
if (!ServerLifeCycleManager.isRunning()) { |
|
||||||
return; |
|
||||||
} |
|
||||||
heartBeat.setStartupTime(ServerLifeCycleManager.getServerStartupTime()); |
|
||||||
// update waiting task count
|
|
||||||
heartBeat.setWorkerWaitingTaskCount(workerWaitingTaskCount); |
|
||||||
|
|
||||||
for (String heartBeatPath : heartBeatPaths) { |
|
||||||
registryClient.persistEphemeral(heartBeatPath, heartBeat.encodeHeartBeat()); |
|
||||||
} |
|
||||||
heartBeatErrorTimes.set(0); |
|
||||||
} catch (Throwable ex) { |
|
||||||
logger.error("HeartBeat task execute failed, errorTimes: {}", heartBeatErrorTimes.get(), ex); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -0,0 +1,107 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.server.worker.task; |
||||||
|
|
||||||
|
import lombok.NonNull; |
||||||
|
import lombok.extern.slf4j.Slf4j; |
||||||
|
import org.apache.dolphinscheduler.common.Constants; |
||||||
|
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
||||||
|
import org.apache.dolphinscheduler.common.model.BaseHeartBeatTask; |
||||||
|
import org.apache.dolphinscheduler.common.model.WorkerHeartBeat; |
||||||
|
import org.apache.dolphinscheduler.common.utils.JSONUtils; |
||||||
|
import org.apache.dolphinscheduler.common.utils.OSUtils; |
||||||
|
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig; |
||||||
|
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||||
|
|
||||||
|
import java.util.function.Supplier; |
||||||
|
|
||||||
|
@Slf4j |
||||||
|
public class WorkerHeartBeatTask extends BaseHeartBeatTask<WorkerHeartBeat> { |
||||||
|
|
||||||
|
private final WorkerConfig workerConfig; |
||||||
|
private final RegistryClient registryClient; |
||||||
|
|
||||||
|
private final Supplier<Integer> workerWaitingTaskCount; |
||||||
|
|
||||||
|
private final int processId; |
||||||
|
|
||||||
|
public WorkerHeartBeatTask(@NonNull WorkerConfig workerConfig, |
||||||
|
@NonNull RegistryClient registryClient, |
||||||
|
@NonNull Supplier<Integer> workerWaitingTaskCount) { |
||||||
|
super("WorkerHeartBeatTask", workerConfig.getHeartbeatInterval().toMillis()); |
||||||
|
this.workerConfig = workerConfig; |
||||||
|
this.registryClient = registryClient; |
||||||
|
this.workerWaitingTaskCount = workerWaitingTaskCount; |
||||||
|
this.processId = OSUtils.getProcessID(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public WorkerHeartBeat getHeartBeat() { |
||||||
|
double loadAverage = OSUtils.loadAverage(); |
||||||
|
double cpuUsage = OSUtils.cpuUsage(); |
||||||
|
int maxCpuLoadAvg = workerConfig.getMaxCpuLoadAvg(); |
||||||
|
double reservedMemory = workerConfig.getReservedMemory(); |
||||||
|
double availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize(); |
||||||
|
int execThreads = workerConfig.getExecThreads(); |
||||||
|
int workerWaitingTaskCount = this.workerWaitingTaskCount.get(); |
||||||
|
int serverStatus = getServerStatus(loadAverage, maxCpuLoadAvg, availablePhysicalMemorySize, reservedMemory, execThreads, workerWaitingTaskCount); |
||||||
|
|
||||||
|
return WorkerHeartBeat.builder() |
||||||
|
.startupTime(ServerLifeCycleManager.getServerStartupTime()) |
||||||
|
.reportTime(System.currentTimeMillis()) |
||||||
|
.cpuUsage(cpuUsage) |
||||||
|
.loadAverage(loadAverage) |
||||||
|
.availablePhysicalMemorySize(availablePhysicalMemorySize) |
||||||
|
.maxCpuloadAvg(maxCpuLoadAvg) |
||||||
|
.reservedMemory(reservedMemory) |
||||||
|
.processId(processId) |
||||||
|
.workerHostWeight(workerConfig.getHostWeight()) |
||||||
|
.workerWaitingTaskCount(this.workerWaitingTaskCount.get()) |
||||||
|
.workerExecThreadCount(workerConfig.getExecThreads()) |
||||||
|
.serverStatus(serverStatus) |
||||||
|
.build(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void writeHeartBeat(WorkerHeartBeat workerHeartBeat) { |
||||||
|
String workerHeartBeatJson = JSONUtils.toJsonString(workerHeartBeat); |
||||||
|
for (String workerGroupRegistryPath : workerConfig.getWorkerGroupRegistryPaths()) { |
||||||
|
registryClient.persistEphemeral(workerGroupRegistryPath, workerHeartBeatJson); |
||||||
|
} |
||||||
|
log.info("Success write worker group heartBeatInfo into registry, workGroupPath: {} workerHeartBeatInfo: {}", |
||||||
|
workerConfig.getWorkerGroupRegistryPaths(), workerHeartBeatJson); |
||||||
|
} |
||||||
|
|
||||||
|
public int getServerStatus(double loadAverage, |
||||||
|
double maxCpuloadAvg, |
||||||
|
double availablePhysicalMemorySize, |
||||||
|
double reservedMemory, |
||||||
|
int workerExecThreadCount, |
||||||
|
int workerWaitingTaskCount) { |
||||||
|
if (loadAverage > maxCpuloadAvg || availablePhysicalMemorySize < reservedMemory) { |
||||||
|
log.warn("current cpu load average {} is too high or available memory {}G is too low, under max.cpuload.avg={} and reserved.memory={}G", |
||||||
|
loadAverage, availablePhysicalMemorySize, maxCpuloadAvg, reservedMemory); |
||||||
|
return Constants.ABNORMAL_NODE_STATUS; |
||||||
|
} else if (workerWaitingTaskCount > workerExecThreadCount) { |
||||||
|
log.warn("current waiting task count {} is large than worker thread count {}, worker is busy", workerWaitingTaskCount, workerExecThreadCount); |
||||||
|
return Constants.BUSY_NODE_STATUE; |
||||||
|
} else { |
||||||
|
return Constants.NORMAL_NODE_STATUS; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue