Browse Source

add monitorServerState

pull/2/head
ligang 6 years ago
parent
commit
5739eb0de2
  1. 9
      install.sh
  2. 42
      script/monitor_server.py

9
install.sh

@ -98,6 +98,8 @@ xlsFilePath="/tmp/xls"
# 不启动设置为false,如果为false,以下配置不需要修改
hdfsStartupSate="false"
#是否启动自启动脚本
monitorServerState="true"
# namenode地址,支持HA,需要将core-site.xml和hdfs-site.xml放到conf目录下
namenodeFs="hdfs://mycluster:8020"
@ -364,3 +366,10 @@ fi
# 6,启动
echo "6,启动"
sh ${workDir}/script/start_all.sh
# 7启动自启动脚本
if [ "true" = $monitorServerState ];then
echo 'start monitor server'
nohup python -u ${workDir}/script/monitor_server.py $installPath $zkQuorum $zkMasters $zkWorkers > ${workDir}/monitor_server.log 2>&1 &
fi

42
script/monitor_server.py

@ -10,9 +10,13 @@ pip install kazoo 安装
conda install -c conda-forge kazoo 安装
运行脚本
nohup python -u monitor_server.py > nohup.out 2>&1 &
/data1_1T/escheduler的值来自install.sh中的installPath
192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181的值来自install.sh中的zkQuorum
/escheduler/masters的值来自install.sh中的zkMasters
/escheduler/workers的值来自install.sh中的zkWorkers
nohup python -u monitor_server.py /data1_1T/escheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /escheduler/masters /escheduler/workers> nohup.out 2>&1 &
'''
import sys
import socket
import os
import sched
@ -20,14 +24,17 @@ import time
from datetime import datetime
from kazoo.client import KazooClient
schedule = sched.scheduler(time.time, time.sleep)
class ZkClient:
def __init__(self):
# hosts配置zk地址集群
self.zk = KazooClient(hosts='ark0:2181,ark1:2181,ark2:2181')
self.zk.start()
#self.zk = KazooClient(hosts='192.168.220.188:2181,192.168.220.189:2181,192.168.220.190:2181')
print zookeepers
#zookeepers1 = zookeepers
self.zk = KazooClient(hosts=zookeepers)
print "ready start"
self.zk.start()
# 读取配置文件,组装成字典
def read_file(self,path):
@ -45,35 +52,37 @@ class ZkClient:
# 重启服务
def restart_server(self,inc):
config_dict = self.read_file('/data1_1T/escheduler/conf/config/run_config.conf')
config_dict = self.read_file(install_path + '/conf/config/run_config.conf')
master_list = config_dict.get('masters').split(',')
print master_list
master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list))
worker_list = config_dict.get('workers').split(',')
print worker_list
worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list))
if (self.zk.exists('/escheduler/masters')):
if (self.zk.exists(masters_zk_path)):
zk_master_list = []
zk_master_nodes = self.zk.get_children('/escheduler/masters')
zk_master_nodes = self.zk.get_children(masters_zk_path)
for zk_master_node in zk_master_nodes:
zk_master_list.append(zk_master_node.split('_')[0])
restart_master_list = list(set(master_list) - set(zk_master_list))
if (len(restart_master_list) != 0):
for master in restart_master_list:
print("master " + self.get_ip_by_hostname(master) + " 服务已经掉了")
os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start master-server')
os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start master-server')
if (self.zk.exists('/escheduler/workers')):
if (self.zk.exists(workers_zk_path)):
zk_worker_list = []
zk_worker_nodes = self.zk.get_children('/escheduler/workers')
zk_worker_nodes = self.zk.get_children(workers_zk_path)
for zk_worker_node in zk_worker_nodes:
zk_worker_list.append(zk_worker_node.split('_')[0])
restart_worker_list = list(set(worker_list) - set(zk_worker_list))
if (len(restart_worker_list) != 0):
for worker in restart_worker_list:
print("worker " + self.get_ip_by_hostname(worker) + " 服务已经掉了")
os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start worker-server')
os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start worker-server')
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
schedule.enter(inc, 0, self.restart_server, (inc,))
@ -84,5 +93,12 @@ class ZkClient:
schedule.enter(0, 0, self.restart_server, (inc,))
schedule.run()
if __name__ == '__main__':
if (len(sys.argv) < 4):
print('please input install_path,zookeepers,masters_zk_path and worker_zk_path')
install_path = sys.argv[1]
#zookeepers = "'" + sys.argv[2] + "'"
zookeepers = sys.argv[2]
masters_zk_path = sys.argv[3]
workers_zk_path = sys.argv[4]
zkClient = ZkClient()
zkClient.main(300)
zkClient.main(300)

Loading…
Cancel
Save