Browse Source
* [DS-6499][WorkerServer] report busy state when worker is overload3.0.0/version-upgrade
wind
3 years ago
committed by
GitHub
13 changed files with 432 additions and 243 deletions
@ -0,0 +1,248 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.common.utils; |
||||||
|
|
||||||
|
import org.apache.dolphinscheduler.common.Constants; |
||||||
|
|
||||||
|
import org.slf4j.Logger; |
||||||
|
import org.slf4j.LoggerFactory; |
||||||
|
|
||||||
|
public class HeartBeat { |
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(HeartBeat.class); |
||||||
|
public static final String COMMA = ","; |
||||||
|
|
||||||
|
private long startupTime; |
||||||
|
private long reportTime; |
||||||
|
private double cpuUsage; |
||||||
|
private double memoryUsage; |
||||||
|
private double loadAverage; |
||||||
|
private double availablePhysicalMemorySize; |
||||||
|
private double maxCpuloadAvg; |
||||||
|
private double reservedMemory; |
||||||
|
private int serverStatus; |
||||||
|
private int processId; |
||||||
|
|
||||||
|
private int workerHostWeight; // worker host weight
|
||||||
|
private int workerWaitingTaskCount; // worker waiting task count
|
||||||
|
private int workerExecThreadCount; // worker thread pool thread count
|
||||||
|
|
||||||
|
public long getStartupTime() { |
||||||
|
return startupTime; |
||||||
|
} |
||||||
|
|
||||||
|
public void setStartupTime(long startupTime) { |
||||||
|
this.startupTime = startupTime; |
||||||
|
} |
||||||
|
|
||||||
|
public long getReportTime() { |
||||||
|
return reportTime; |
||||||
|
} |
||||||
|
|
||||||
|
public void setReportTime(long reportTime) { |
||||||
|
this.reportTime = reportTime; |
||||||
|
} |
||||||
|
|
||||||
|
public double getCpuUsage() { |
||||||
|
return cpuUsage; |
||||||
|
} |
||||||
|
|
||||||
|
public void setCpuUsage(double cpuUsage) { |
||||||
|
this.cpuUsage = cpuUsage; |
||||||
|
} |
||||||
|
|
||||||
|
public double getMemoryUsage() { |
||||||
|
return memoryUsage; |
||||||
|
} |
||||||
|
|
||||||
|
public void setMemoryUsage(double memoryUsage) { |
||||||
|
this.memoryUsage = memoryUsage; |
||||||
|
} |
||||||
|
|
||||||
|
public double getLoadAverage() { |
||||||
|
return loadAverage; |
||||||
|
} |
||||||
|
|
||||||
|
public void setLoadAverage(double loadAverage) { |
||||||
|
this.loadAverage = loadAverage; |
||||||
|
} |
||||||
|
|
||||||
|
public double getAvailablePhysicalMemorySize() { |
||||||
|
return availablePhysicalMemorySize; |
||||||
|
} |
||||||
|
|
||||||
|
public void setAvailablePhysicalMemorySize(double availablePhysicalMemorySize) { |
||||||
|
this.availablePhysicalMemorySize = availablePhysicalMemorySize; |
||||||
|
} |
||||||
|
|
||||||
|
public double getMaxCpuloadAvg() { |
||||||
|
return maxCpuloadAvg; |
||||||
|
} |
||||||
|
|
||||||
|
public void setMaxCpuloadAvg(double maxCpuloadAvg) { |
||||||
|
this.maxCpuloadAvg = maxCpuloadAvg; |
||||||
|
} |
||||||
|
|
||||||
|
public double getReservedMemory() { |
||||||
|
return reservedMemory; |
||||||
|
} |
||||||
|
|
||||||
|
public void setReservedMemory(double reservedMemory) { |
||||||
|
this.reservedMemory = reservedMemory; |
||||||
|
} |
||||||
|
|
||||||
|
public int getServerStatus() { |
||||||
|
return serverStatus; |
||||||
|
} |
||||||
|
|
||||||
|
public void setServerStatus(int serverStatus) { |
||||||
|
this.serverStatus = serverStatus; |
||||||
|
} |
||||||
|
|
||||||
|
public int getProcessId() { |
||||||
|
return processId; |
||||||
|
} |
||||||
|
|
||||||
|
public void setProcessId(int processId) { |
||||||
|
this.processId = processId; |
||||||
|
} |
||||||
|
|
||||||
|
public int getWorkerHostWeight() { |
||||||
|
return workerHostWeight; |
||||||
|
} |
||||||
|
|
||||||
|
public void setWorkerHostWeight(int workerHostWeight) { |
||||||
|
this.workerHostWeight = workerHostWeight; |
||||||
|
} |
||||||
|
|
||||||
|
public int getWorkerWaitingTaskCount() { |
||||||
|
return workerWaitingTaskCount; |
||||||
|
} |
||||||
|
|
||||||
|
public void setWorkerWaitingTaskCount(int workerWaitingTaskCount) { |
||||||
|
this.workerWaitingTaskCount = workerWaitingTaskCount; |
||||||
|
} |
||||||
|
|
||||||
|
public int getWorkerExecThreadCount() { |
||||||
|
return workerExecThreadCount; |
||||||
|
} |
||||||
|
|
||||||
|
public void setWorkerExecThreadCount(int workerExecThreadCount) { |
||||||
|
this.workerExecThreadCount = workerExecThreadCount; |
||||||
|
} |
||||||
|
|
||||||
|
public HeartBeat() { |
||||||
|
this.reportTime = System.currentTimeMillis(); |
||||||
|
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||||
|
} |
||||||
|
|
||||||
|
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory) { |
||||||
|
this.reportTime = System.currentTimeMillis(); |
||||||
|
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||||
|
this.startupTime = startupTime; |
||||||
|
this.maxCpuloadAvg = maxCpuloadAvg; |
||||||
|
this.reservedMemory = reservedMemory; |
||||||
|
} |
||||||
|
|
||||||
|
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory, int hostWeight, int workerExecThreadCount) { |
||||||
|
this.reportTime = System.currentTimeMillis(); |
||||||
|
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||||
|
this.startupTime = startupTime; |
||||||
|
this.maxCpuloadAvg = maxCpuloadAvg; |
||||||
|
this.reservedMemory = reservedMemory; |
||||||
|
this.workerHostWeight = hostWeight; |
||||||
|
this.workerExecThreadCount = workerExecThreadCount; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* fill system info |
||||||
|
*/ |
||||||
|
private void fillSystemInfo() { |
||||||
|
this.cpuUsage = OSUtils.cpuUsage(); |
||||||
|
this.loadAverage = OSUtils.loadAverage(); |
||||||
|
this.availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize(); |
||||||
|
this.memoryUsage = OSUtils.memoryUsage(); |
||||||
|
this.processId = OSUtils.getProcessID(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* update server state |
||||||
|
*/ |
||||||
|
public void updateServerState() { |
||||||
|
if (loadAverage > maxCpuloadAvg || availablePhysicalMemorySize < reservedMemory) { |
||||||
|
logger.warn("current cpu load average {} is too high or available memory {}G is too low, under max.cpuload.avg={} and reserved.memory={}G", |
||||||
|
loadAverage, availablePhysicalMemorySize, maxCpuloadAvg, reservedMemory); |
||||||
|
this.serverStatus = Constants.ABNORMAL_NODE_STATUS; |
||||||
|
} else if (workerWaitingTaskCount > workerExecThreadCount) { |
||||||
|
logger.warn("current waiting task count {} is large than worker thread count {}, worker is busy", workerWaitingTaskCount, workerExecThreadCount); |
||||||
|
this.serverStatus = Constants.BUSY_NODE_STATUE; |
||||||
|
} else { |
||||||
|
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* encode heartbeat |
||||||
|
*/ |
||||||
|
public String encodeHeartBeat() { |
||||||
|
this.fillSystemInfo(); |
||||||
|
this.updateServerState(); |
||||||
|
|
||||||
|
StringBuilder builder = new StringBuilder(100); |
||||||
|
builder.append(cpuUsage).append(COMMA); |
||||||
|
builder.append(memoryUsage).append(COMMA); |
||||||
|
builder.append(loadAverage).append(COMMA); |
||||||
|
builder.append(availablePhysicalMemorySize).append(Constants.COMMA); |
||||||
|
builder.append(maxCpuloadAvg).append(Constants.COMMA); |
||||||
|
builder.append(reservedMemory).append(Constants.COMMA); |
||||||
|
builder.append(startupTime).append(Constants.COMMA); |
||||||
|
builder.append(reportTime).append(Constants.COMMA); |
||||||
|
builder.append(serverStatus).append(COMMA); |
||||||
|
builder.append(processId).append(COMMA); |
||||||
|
builder.append(workerHostWeight).append(COMMA); |
||||||
|
builder.append(workerExecThreadCount).append(COMMA); |
||||||
|
builder.append(workerWaitingTaskCount); |
||||||
|
|
||||||
|
return builder.toString(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* decode heartbeat |
||||||
|
*/ |
||||||
|
public static HeartBeat decodeHeartBeat(String heartBeatInfo) { |
||||||
|
String[] parts = heartBeatInfo.split(Constants.COMMA); |
||||||
|
if (parts.length != Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH) { |
||||||
|
return null; |
||||||
|
} |
||||||
|
HeartBeat heartBeat = new HeartBeat(); |
||||||
|
heartBeat.cpuUsage = Double.parseDouble(parts[0]); |
||||||
|
heartBeat.memoryUsage = Double.parseDouble(parts[1]); |
||||||
|
heartBeat.loadAverage = Double.parseDouble(parts[2]); |
||||||
|
heartBeat.availablePhysicalMemorySize = Double.parseDouble(parts[3]); |
||||||
|
heartBeat.maxCpuloadAvg = Double.parseDouble(parts[4]); |
||||||
|
heartBeat.reservedMemory = Double.parseDouble(parts[5]); |
||||||
|
heartBeat.startupTime = Long.parseLong(parts[6]); |
||||||
|
heartBeat.reportTime = Long.parseLong(parts[7]); |
||||||
|
heartBeat.serverStatus = Integer.parseInt(parts[8]); |
||||||
|
heartBeat.processId = Integer.parseInt(parts[9]); |
||||||
|
heartBeat.workerHostWeight = Integer.parseInt(parts[10]); |
||||||
|
heartBeat.workerExecThreadCount = Integer.parseInt(parts[11]); |
||||||
|
heartBeat.workerWaitingTaskCount = Integer.parseInt(parts[12]); |
||||||
|
return heartBeat; |
||||||
|
} |
||||||
|
} |
@ -1,135 +0,0 @@ |
|||||||
/* |
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
|
||||||
* contributor license agreements. See the NOTICE file distributed with |
|
||||||
* this work for additional information regarding copyright ownership. |
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
|
||||||
* (the "License"); you may not use this file except in compliance with |
|
||||||
* the License. You may obtain a copy of the License at |
|
||||||
* |
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
* |
|
||||||
* Unless required by applicable law or agreed to in writing, software |
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||||
* See the License for the specific language governing permissions and |
|
||||||
* limitations under the License. |
|
||||||
*/ |
|
||||||
|
|
||||||
package org.apache.dolphinscheduler.common.utils; |
|
||||||
|
|
||||||
import org.apache.dolphinscheduler.common.Constants; |
|
||||||
import org.apache.dolphinscheduler.common.model.Server; |
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils; |
|
||||||
|
|
||||||
/** |
|
||||||
* heartbeat for ZK reigster res info |
|
||||||
*/ |
|
||||||
public class ResInfo { |
|
||||||
|
|
||||||
/** |
|
||||||
* cpuUsage |
|
||||||
*/ |
|
||||||
private double cpuUsage; |
|
||||||
|
|
||||||
/** |
|
||||||
* memoryUsage |
|
||||||
*/ |
|
||||||
private double memoryUsage; |
|
||||||
|
|
||||||
/** |
|
||||||
* loadAverage |
|
||||||
*/ |
|
||||||
private double loadAverage; |
|
||||||
|
|
||||||
public ResInfo(double cpuUsage, double memoryUsage) { |
|
||||||
this.cpuUsage = cpuUsage; |
|
||||||
this.memoryUsage = memoryUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public ResInfo(double cpuUsage, double memoryUsage, double loadAverage) { |
|
||||||
this(cpuUsage,memoryUsage); |
|
||||||
this.loadAverage = loadAverage; |
|
||||||
} |
|
||||||
|
|
||||||
public double getCpuUsage() { |
|
||||||
return cpuUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public void setCpuUsage(double cpuUsage) { |
|
||||||
this.cpuUsage = cpuUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public double getMemoryUsage() { |
|
||||||
return memoryUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public void setMemoryUsage(double memoryUsage) { |
|
||||||
this.memoryUsage = memoryUsage; |
|
||||||
} |
|
||||||
|
|
||||||
public double getLoadAverage() { |
|
||||||
return loadAverage; |
|
||||||
} |
|
||||||
|
|
||||||
public void setLoadAverage(double loadAverage) { |
|
||||||
this.loadAverage = loadAverage; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* get CPU and memory usage |
|
||||||
* @param cpuUsage cpu usage |
|
||||||
* @param memoryUsage memory usage |
|
||||||
* @param loadAverage load average |
|
||||||
* @return cpu and memory usage |
|
||||||
*/ |
|
||||||
public static String getResInfoJson(double cpuUsage, double memoryUsage, double loadAverage) { |
|
||||||
ResInfo resInfo = new ResInfo(cpuUsage,memoryUsage,loadAverage); |
|
||||||
return JSONUtils.toJsonString(resInfo); |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* parse heartbeat info for zk |
|
||||||
* @param heartBeatInfo heartbeat info |
|
||||||
* @return heartbeat info to Server |
|
||||||
*/ |
|
||||||
public static Server parseHeartbeatForRegistryInfo(String heartBeatInfo) { |
|
||||||
if (!isValidHeartbeatForRegistryInfo(heartBeatInfo)) { |
|
||||||
return null; |
|
||||||
} |
|
||||||
String[] parts = heartBeatInfo.split(Constants.COMMA); |
|
||||||
Server server = new Server(); |
|
||||||
server.setResInfo(getResInfoJson(Double.parseDouble(parts[0]), |
|
||||||
Double.parseDouble(parts[1]), |
|
||||||
Double.parseDouble(parts[2]))); |
|
||||||
server.setCreateTime(DateUtils.stringToDate(parts[6])); |
|
||||||
server.setLastHeartbeatTime(DateUtils.stringToDate(parts[7])); |
|
||||||
//set process id
|
|
||||||
server.setId(Integer.parseInt(parts[9])); |
|
||||||
return server; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* is valid heartbeat info for zk |
|
||||||
* @param heartBeatInfo heartbeat info |
|
||||||
* @return heartbeat info is valid |
|
||||||
*/ |
|
||||||
public static boolean isValidHeartbeatForRegistryInfo(String heartBeatInfo) { |
|
||||||
if (!StringUtils.isEmpty(heartBeatInfo)) { |
|
||||||
String[] parts = heartBeatInfo.split(Constants.COMMA); |
|
||||||
return parts.length == Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH |
|
||||||
|| parts.length == Constants.HEARTBEAT_WITH_WEIGHT_FOR_ZOOKEEPER_INFO_LENGTH; |
|
||||||
} |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* is new heartbeat info for zk with weight |
|
||||||
* @param parts heartbeat info parts |
|
||||||
* @return heartbeat info is new with weight |
|
||||||
*/ |
|
||||||
public static boolean isNewHeartbeatWithWeight(String[] parts) { |
|
||||||
return parts.length == Constants.HEARTBEAT_WITH_WEIGHT_FOR_ZOOKEEPER_INFO_LENGTH; |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
@ -0,0 +1,76 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.common.utils; |
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals; |
||||||
|
|
||||||
|
import org.apache.dolphinscheduler.common.Constants; |
||||||
|
|
||||||
|
import org.junit.Test; |
||||||
|
|
||||||
|
/** |
||||||
|
* NetUtilsTest |
||||||
|
*/ |
||||||
|
public class HeartBeatTest { |
||||||
|
|
||||||
|
@Test |
||||||
|
public void testAbnormalState() { |
||||||
|
long startupTime = System.currentTimeMillis(); |
||||||
|
double loadAverage = 100; |
||||||
|
double reservedMemory = 100; |
||||||
|
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory); |
||||||
|
heartBeat.updateServerState(); |
||||||
|
assertEquals(Constants.ABNORMAL_NODE_STATUS, heartBeat.getServerStatus()); |
||||||
|
} |
||||||
|
|
||||||
|
@Test |
||||||
|
public void testBusyState() { |
||||||
|
long startupTime = System.currentTimeMillis(); |
||||||
|
double loadAverage = 0; |
||||||
|
double reservedMemory = 0; |
||||||
|
int hostWeight = 1; |
||||||
|
int taskCount = 200; |
||||||
|
int workerThreadCount = 199; |
||||||
|
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory, hostWeight, workerThreadCount); |
||||||
|
|
||||||
|
heartBeat.setWorkerWaitingTaskCount(taskCount); |
||||||
|
heartBeat.updateServerState(); |
||||||
|
assertEquals(Constants.BUSY_NODE_STATUE, heartBeat.getServerStatus()); |
||||||
|
} |
||||||
|
|
||||||
|
@Test |
||||||
|
public void testDecodeHeartBeat() throws Exception { |
||||||
|
String heartBeatInfo = "0.35,0.58,3.09,6.47,5.0,1.0,1634033006749,1634033006857,1,29732,1,199,200"; |
||||||
|
HeartBeat heartBeat = HeartBeat.decodeHeartBeat(heartBeatInfo); |
||||||
|
|
||||||
|
double delta = 0.001; |
||||||
|
assertEquals(0.35, heartBeat.getCpuUsage(), delta); |
||||||
|
assertEquals(0.58, heartBeat.getMemoryUsage(), delta); |
||||||
|
assertEquals(3.09, heartBeat.getLoadAverage(), delta); |
||||||
|
assertEquals(6.47, heartBeat.getAvailablePhysicalMemorySize(), delta); |
||||||
|
assertEquals(5.0, heartBeat.getMaxCpuloadAvg(), delta); |
||||||
|
assertEquals(1.0, heartBeat.getReservedMemory(), delta); |
||||||
|
assertEquals(1634033006749L, heartBeat.getStartupTime()); |
||||||
|
assertEquals(1634033006857L, heartBeat.getReportTime()); |
||||||
|
assertEquals(1, heartBeat.getServerStatus()); |
||||||
|
assertEquals(29732, heartBeat.getProcessId()); |
||||||
|
assertEquals(199, heartBeat.getWorkerExecThreadCount()); |
||||||
|
assertEquals(200, heartBeat.getWorkerWaitingTaskCount()); |
||||||
|
} |
||||||
|
|
||||||
|
} |
Loading…
Reference in new issue