Browse Source
* [DS-6499][WorkerServer] report busy state when worker is overload3.0.0/version-upgrade
wind
3 years ago
committed by
GitHub
13 changed files with 432 additions and 243 deletions
@ -0,0 +1,248 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.utils; |
||||
|
||||
import org.apache.dolphinscheduler.common.Constants; |
||||
|
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
|
||||
public class HeartBeat { |
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HeartBeat.class); |
||||
public static final String COMMA = ","; |
||||
|
||||
private long startupTime; |
||||
private long reportTime; |
||||
private double cpuUsage; |
||||
private double memoryUsage; |
||||
private double loadAverage; |
||||
private double availablePhysicalMemorySize; |
||||
private double maxCpuloadAvg; |
||||
private double reservedMemory; |
||||
private int serverStatus; |
||||
private int processId; |
||||
|
||||
private int workerHostWeight; // worker host weight
|
||||
private int workerWaitingTaskCount; // worker waiting task count
|
||||
private int workerExecThreadCount; // worker thread pool thread count
|
||||
|
||||
public long getStartupTime() { |
||||
return startupTime; |
||||
} |
||||
|
||||
public void setStartupTime(long startupTime) { |
||||
this.startupTime = startupTime; |
||||
} |
||||
|
||||
public long getReportTime() { |
||||
return reportTime; |
||||
} |
||||
|
||||
public void setReportTime(long reportTime) { |
||||
this.reportTime = reportTime; |
||||
} |
||||
|
||||
public double getCpuUsage() { |
||||
return cpuUsage; |
||||
} |
||||
|
||||
public void setCpuUsage(double cpuUsage) { |
||||
this.cpuUsage = cpuUsage; |
||||
} |
||||
|
||||
public double getMemoryUsage() { |
||||
return memoryUsage; |
||||
} |
||||
|
||||
public void setMemoryUsage(double memoryUsage) { |
||||
this.memoryUsage = memoryUsage; |
||||
} |
||||
|
||||
public double getLoadAverage() { |
||||
return loadAverage; |
||||
} |
||||
|
||||
public void setLoadAverage(double loadAverage) { |
||||
this.loadAverage = loadAverage; |
||||
} |
||||
|
||||
public double getAvailablePhysicalMemorySize() { |
||||
return availablePhysicalMemorySize; |
||||
} |
||||
|
||||
public void setAvailablePhysicalMemorySize(double availablePhysicalMemorySize) { |
||||
this.availablePhysicalMemorySize = availablePhysicalMemorySize; |
||||
} |
||||
|
||||
public double getMaxCpuloadAvg() { |
||||
return maxCpuloadAvg; |
||||
} |
||||
|
||||
public void setMaxCpuloadAvg(double maxCpuloadAvg) { |
||||
this.maxCpuloadAvg = maxCpuloadAvg; |
||||
} |
||||
|
||||
public double getReservedMemory() { |
||||
return reservedMemory; |
||||
} |
||||
|
||||
public void setReservedMemory(double reservedMemory) { |
||||
this.reservedMemory = reservedMemory; |
||||
} |
||||
|
||||
public int getServerStatus() { |
||||
return serverStatus; |
||||
} |
||||
|
||||
public void setServerStatus(int serverStatus) { |
||||
this.serverStatus = serverStatus; |
||||
} |
||||
|
||||
public int getProcessId() { |
||||
return processId; |
||||
} |
||||
|
||||
public void setProcessId(int processId) { |
||||
this.processId = processId; |
||||
} |
||||
|
||||
public int getWorkerHostWeight() { |
||||
return workerHostWeight; |
||||
} |
||||
|
||||
public void setWorkerHostWeight(int workerHostWeight) { |
||||
this.workerHostWeight = workerHostWeight; |
||||
} |
||||
|
||||
public int getWorkerWaitingTaskCount() { |
||||
return workerWaitingTaskCount; |
||||
} |
||||
|
||||
public void setWorkerWaitingTaskCount(int workerWaitingTaskCount) { |
||||
this.workerWaitingTaskCount = workerWaitingTaskCount; |
||||
} |
||||
|
||||
public int getWorkerExecThreadCount() { |
||||
return workerExecThreadCount; |
||||
} |
||||
|
||||
public void setWorkerExecThreadCount(int workerExecThreadCount) { |
||||
this.workerExecThreadCount = workerExecThreadCount; |
||||
} |
||||
|
||||
public HeartBeat() { |
||||
this.reportTime = System.currentTimeMillis(); |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
} |
||||
|
||||
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory) { |
||||
this.reportTime = System.currentTimeMillis(); |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
this.startupTime = startupTime; |
||||
this.maxCpuloadAvg = maxCpuloadAvg; |
||||
this.reservedMemory = reservedMemory; |
||||
} |
||||
|
||||
public HeartBeat(long startupTime, double maxCpuloadAvg, double reservedMemory, int hostWeight, int workerExecThreadCount) { |
||||
this.reportTime = System.currentTimeMillis(); |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
this.startupTime = startupTime; |
||||
this.maxCpuloadAvg = maxCpuloadAvg; |
||||
this.reservedMemory = reservedMemory; |
||||
this.workerHostWeight = hostWeight; |
||||
this.workerExecThreadCount = workerExecThreadCount; |
||||
} |
||||
|
||||
/** |
||||
* fill system info |
||||
*/ |
||||
private void fillSystemInfo() { |
||||
this.cpuUsage = OSUtils.cpuUsage(); |
||||
this.loadAverage = OSUtils.loadAverage(); |
||||
this.availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize(); |
||||
this.memoryUsage = OSUtils.memoryUsage(); |
||||
this.processId = OSUtils.getProcessID(); |
||||
} |
||||
|
||||
/** |
||||
* update server state |
||||
*/ |
||||
public void updateServerState() { |
||||
if (loadAverage > maxCpuloadAvg || availablePhysicalMemorySize < reservedMemory) { |
||||
logger.warn("current cpu load average {} is too high or available memory {}G is too low, under max.cpuload.avg={} and reserved.memory={}G", |
||||
loadAverage, availablePhysicalMemorySize, maxCpuloadAvg, reservedMemory); |
||||
this.serverStatus = Constants.ABNORMAL_NODE_STATUS; |
||||
} else if (workerWaitingTaskCount > workerExecThreadCount) { |
||||
logger.warn("current waiting task count {} is large than worker thread count {}, worker is busy", workerWaitingTaskCount, workerExecThreadCount); |
||||
this.serverStatus = Constants.BUSY_NODE_STATUE; |
||||
} else { |
||||
this.serverStatus = Constants.NORMAL_NODE_STATUS; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* encode heartbeat |
||||
*/ |
||||
public String encodeHeartBeat() { |
||||
this.fillSystemInfo(); |
||||
this.updateServerState(); |
||||
|
||||
StringBuilder builder = new StringBuilder(100); |
||||
builder.append(cpuUsage).append(COMMA); |
||||
builder.append(memoryUsage).append(COMMA); |
||||
builder.append(loadAverage).append(COMMA); |
||||
builder.append(availablePhysicalMemorySize).append(Constants.COMMA); |
||||
builder.append(maxCpuloadAvg).append(Constants.COMMA); |
||||
builder.append(reservedMemory).append(Constants.COMMA); |
||||
builder.append(startupTime).append(Constants.COMMA); |
||||
builder.append(reportTime).append(Constants.COMMA); |
||||
builder.append(serverStatus).append(COMMA); |
||||
builder.append(processId).append(COMMA); |
||||
builder.append(workerHostWeight).append(COMMA); |
||||
builder.append(workerExecThreadCount).append(COMMA); |
||||
builder.append(workerWaitingTaskCount); |
||||
|
||||
return builder.toString(); |
||||
} |
||||
|
||||
/** |
||||
* decode heartbeat |
||||
*/ |
||||
public static HeartBeat decodeHeartBeat(String heartBeatInfo) { |
||||
String[] parts = heartBeatInfo.split(Constants.COMMA); |
||||
if (parts.length != Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH) { |
||||
return null; |
||||
} |
||||
HeartBeat heartBeat = new HeartBeat(); |
||||
heartBeat.cpuUsage = Double.parseDouble(parts[0]); |
||||
heartBeat.memoryUsage = Double.parseDouble(parts[1]); |
||||
heartBeat.loadAverage = Double.parseDouble(parts[2]); |
||||
heartBeat.availablePhysicalMemorySize = Double.parseDouble(parts[3]); |
||||
heartBeat.maxCpuloadAvg = Double.parseDouble(parts[4]); |
||||
heartBeat.reservedMemory = Double.parseDouble(parts[5]); |
||||
heartBeat.startupTime = Long.parseLong(parts[6]); |
||||
heartBeat.reportTime = Long.parseLong(parts[7]); |
||||
heartBeat.serverStatus = Integer.parseInt(parts[8]); |
||||
heartBeat.processId = Integer.parseInt(parts[9]); |
||||
heartBeat.workerHostWeight = Integer.parseInt(parts[10]); |
||||
heartBeat.workerExecThreadCount = Integer.parseInt(parts[11]); |
||||
heartBeat.workerWaitingTaskCount = Integer.parseInt(parts[12]); |
||||
return heartBeat; |
||||
} |
||||
} |
@ -1,135 +0,0 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.utils; |
||||
|
||||
import org.apache.dolphinscheduler.common.Constants; |
||||
import org.apache.dolphinscheduler.common.model.Server; |
||||
|
||||
import org.apache.commons.lang.StringUtils; |
||||
|
||||
/** |
||||
* heartbeat for ZK reigster res info |
||||
*/ |
||||
public class ResInfo { |
||||
|
||||
/** |
||||
* cpuUsage |
||||
*/ |
||||
private double cpuUsage; |
||||
|
||||
/** |
||||
* memoryUsage |
||||
*/ |
||||
private double memoryUsage; |
||||
|
||||
/** |
||||
* loadAverage |
||||
*/ |
||||
private double loadAverage; |
||||
|
||||
public ResInfo(double cpuUsage, double memoryUsage) { |
||||
this.cpuUsage = cpuUsage; |
||||
this.memoryUsage = memoryUsage; |
||||
} |
||||
|
||||
public ResInfo(double cpuUsage, double memoryUsage, double loadAverage) { |
||||
this(cpuUsage,memoryUsage); |
||||
this.loadAverage = loadAverage; |
||||
} |
||||
|
||||
public double getCpuUsage() { |
||||
return cpuUsage; |
||||
} |
||||
|
||||
public void setCpuUsage(double cpuUsage) { |
||||
this.cpuUsage = cpuUsage; |
||||
} |
||||
|
||||
public double getMemoryUsage() { |
||||
return memoryUsage; |
||||
} |
||||
|
||||
public void setMemoryUsage(double memoryUsage) { |
||||
this.memoryUsage = memoryUsage; |
||||
} |
||||
|
||||
public double getLoadAverage() { |
||||
return loadAverage; |
||||
} |
||||
|
||||
public void setLoadAverage(double loadAverage) { |
||||
this.loadAverage = loadAverage; |
||||
} |
||||
|
||||
/** |
||||
* get CPU and memory usage |
||||
* @param cpuUsage cpu usage |
||||
* @param memoryUsage memory usage |
||||
* @param loadAverage load average |
||||
* @return cpu and memory usage |
||||
*/ |
||||
public static String getResInfoJson(double cpuUsage, double memoryUsage, double loadAverage) { |
||||
ResInfo resInfo = new ResInfo(cpuUsage,memoryUsage,loadAverage); |
||||
return JSONUtils.toJsonString(resInfo); |
||||
} |
||||
|
||||
/** |
||||
* parse heartbeat info for zk |
||||
* @param heartBeatInfo heartbeat info |
||||
* @return heartbeat info to Server |
||||
*/ |
||||
public static Server parseHeartbeatForRegistryInfo(String heartBeatInfo) { |
||||
if (!isValidHeartbeatForRegistryInfo(heartBeatInfo)) { |
||||
return null; |
||||
} |
||||
String[] parts = heartBeatInfo.split(Constants.COMMA); |
||||
Server server = new Server(); |
||||
server.setResInfo(getResInfoJson(Double.parseDouble(parts[0]), |
||||
Double.parseDouble(parts[1]), |
||||
Double.parseDouble(parts[2]))); |
||||
server.setCreateTime(DateUtils.stringToDate(parts[6])); |
||||
server.setLastHeartbeatTime(DateUtils.stringToDate(parts[7])); |
||||
//set process id
|
||||
server.setId(Integer.parseInt(parts[9])); |
||||
return server; |
||||
} |
||||
|
||||
/** |
||||
* is valid heartbeat info for zk |
||||
* @param heartBeatInfo heartbeat info |
||||
* @return heartbeat info is valid |
||||
*/ |
||||
public static boolean isValidHeartbeatForRegistryInfo(String heartBeatInfo) { |
||||
if (!StringUtils.isEmpty(heartBeatInfo)) { |
||||
String[] parts = heartBeatInfo.split(Constants.COMMA); |
||||
return parts.length == Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH |
||||
|| parts.length == Constants.HEARTBEAT_WITH_WEIGHT_FOR_ZOOKEEPER_INFO_LENGTH; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
/** |
||||
* is new heartbeat info for zk with weight |
||||
* @param parts heartbeat info parts |
||||
* @return heartbeat info is new with weight |
||||
*/ |
||||
public static boolean isNewHeartbeatWithWeight(String[] parts) { |
||||
return parts.length == Constants.HEARTBEAT_WITH_WEIGHT_FOR_ZOOKEEPER_INFO_LENGTH; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,76 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.common.utils; |
||||
|
||||
import static org.junit.Assert.assertEquals; |
||||
|
||||
import org.apache.dolphinscheduler.common.Constants; |
||||
|
||||
import org.junit.Test; |
||||
|
||||
/** |
||||
* NetUtilsTest |
||||
*/ |
||||
public class HeartBeatTest { |
||||
|
||||
@Test |
||||
public void testAbnormalState() { |
||||
long startupTime = System.currentTimeMillis(); |
||||
double loadAverage = 100; |
||||
double reservedMemory = 100; |
||||
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory); |
||||
heartBeat.updateServerState(); |
||||
assertEquals(Constants.ABNORMAL_NODE_STATUS, heartBeat.getServerStatus()); |
||||
} |
||||
|
||||
@Test |
||||
public void testBusyState() { |
||||
long startupTime = System.currentTimeMillis(); |
||||
double loadAverage = 0; |
||||
double reservedMemory = 0; |
||||
int hostWeight = 1; |
||||
int taskCount = 200; |
||||
int workerThreadCount = 199; |
||||
HeartBeat heartBeat = new HeartBeat(startupTime, loadAverage, reservedMemory, hostWeight, workerThreadCount); |
||||
|
||||
heartBeat.setWorkerWaitingTaskCount(taskCount); |
||||
heartBeat.updateServerState(); |
||||
assertEquals(Constants.BUSY_NODE_STATUE, heartBeat.getServerStatus()); |
||||
} |
||||
|
||||
@Test |
||||
public void testDecodeHeartBeat() throws Exception { |
||||
String heartBeatInfo = "0.35,0.58,3.09,6.47,5.0,1.0,1634033006749,1634033006857,1,29732,1,199,200"; |
||||
HeartBeat heartBeat = HeartBeat.decodeHeartBeat(heartBeatInfo); |
||||
|
||||
double delta = 0.001; |
||||
assertEquals(0.35, heartBeat.getCpuUsage(), delta); |
||||
assertEquals(0.58, heartBeat.getMemoryUsage(), delta); |
||||
assertEquals(3.09, heartBeat.getLoadAverage(), delta); |
||||
assertEquals(6.47, heartBeat.getAvailablePhysicalMemorySize(), delta); |
||||
assertEquals(5.0, heartBeat.getMaxCpuloadAvg(), delta); |
||||
assertEquals(1.0, heartBeat.getReservedMemory(), delta); |
||||
assertEquals(1634033006749L, heartBeat.getStartupTime()); |
||||
assertEquals(1634033006857L, heartBeat.getReportTime()); |
||||
assertEquals(1, heartBeat.getServerStatus()); |
||||
assertEquals(29732, heartBeat.getProcessId()); |
||||
assertEquals(199, heartBeat.getWorkerExecThreadCount()); |
||||
assertEquals(200, heartBeat.getWorkerWaitingTaskCount()); |
||||
} |
||||
|
||||
} |
Loading…
Reference in new issue