|
|
@ -9,10 +9,15 @@ yum -y install python-pip |
|
|
|
pip install kazoo 安装 |
|
|
|
pip install kazoo 安装 |
|
|
|
conda install -c conda-forge kazoo 安装 |
|
|
|
conda install -c conda-forge kazoo 安装 |
|
|
|
|
|
|
|
|
|
|
|
运行脚本: |
|
|
|
运行脚本及参数说明: |
|
|
|
nohup python -u monitor_server.py > nohup.out 2>&1 & |
|
|
|
nohup python -u monitor_server.py /data1_1T/escheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /escheduler/masters /escheduler/workers> monitor_server.log 2>&1 & |
|
|
|
|
|
|
|
参数说明如下: |
|
|
|
|
|
|
|
/data1_1T/escheduler的值来自install.sh中的installPath |
|
|
|
|
|
|
|
192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181的值来自install.sh中的zkQuorum |
|
|
|
|
|
|
|
/escheduler/masters的值来自install.sh中的zkMasters |
|
|
|
|
|
|
|
/escheduler/workers的值来自install.sh中的zkWorkers |
|
|
|
''' |
|
|
|
''' |
|
|
|
|
|
|
|
import sys |
|
|
|
import socket |
|
|
|
import socket |
|
|
|
import os |
|
|
|
import os |
|
|
|
import sched |
|
|
|
import sched |
|
|
@ -20,14 +25,13 @@ import time |
|
|
|
from datetime import datetime |
|
|
|
from datetime import datetime |
|
|
|
from kazoo.client import KazooClient |
|
|
|
from kazoo.client import KazooClient |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
schedule = sched.scheduler(time.time, time.sleep) |
|
|
|
schedule = sched.scheduler(time.time, time.sleep) |
|
|
|
|
|
|
|
|
|
|
|
class ZkClient: |
|
|
|
class ZkClient: |
|
|
|
def __init__(self): |
|
|
|
def __init__(self): |
|
|
|
# hosts配置zk地址集群 |
|
|
|
# hosts配置zk地址集群 |
|
|
|
self.zk = KazooClient(hosts='ark0:2181,ark1:2181,ark2:2181') |
|
|
|
self.zk = KazooClient(hosts=zookeepers) |
|
|
|
self.zk.start() |
|
|
|
self.zk.start() |
|
|
|
|
|
|
|
|
|
|
|
# 读取配置文件,组装成字典 |
|
|
|
# 读取配置文件,组装成字典 |
|
|
|
def read_file(self,path): |
|
|
|
def read_file(self,path): |
|
|
@ -45,35 +49,37 @@ class ZkClient: |
|
|
|
|
|
|
|
|
|
|
|
# 重启服务 |
|
|
|
# 重启服务 |
|
|
|
def restart_server(self,inc): |
|
|
|
def restart_server(self,inc): |
|
|
|
config_dict = self.read_file('/data1_1T/escheduler/conf/config/run_config.conf') |
|
|
|
config_dict = self.read_file(install_path + '/conf/config/run_config.conf') |
|
|
|
|
|
|
|
|
|
|
|
master_list = config_dict.get('masters').split(',') |
|
|
|
master_list = config_dict.get('masters').split(',') |
|
|
|
|
|
|
|
print master_list |
|
|
|
master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list)) |
|
|
|
master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list)) |
|
|
|
|
|
|
|
|
|
|
|
worker_list = config_dict.get('workers').split(',') |
|
|
|
worker_list = config_dict.get('workers').split(',') |
|
|
|
|
|
|
|
print worker_list |
|
|
|
worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list)) |
|
|
|
worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list)) |
|
|
|
|
|
|
|
|
|
|
|
if (self.zk.exists('/escheduler/masters')): |
|
|
|
if (self.zk.exists(masters_zk_path)): |
|
|
|
zk_master_list = [] |
|
|
|
zk_master_list = [] |
|
|
|
zk_master_nodes = self.zk.get_children('/escheduler/masters') |
|
|
|
zk_master_nodes = self.zk.get_children(masters_zk_path) |
|
|
|
for zk_master_node in zk_master_nodes: |
|
|
|
for zk_master_node in zk_master_nodes: |
|
|
|
zk_master_list.append(zk_master_node.split('_')[0]) |
|
|
|
zk_master_list.append(zk_master_node.split('_')[0]) |
|
|
|
restart_master_list = list(set(master_list) - set(zk_master_list)) |
|
|
|
restart_master_list = list(set(master_list) - set(zk_master_list)) |
|
|
|
if (len(restart_master_list) != 0): |
|
|
|
if (len(restart_master_list) != 0): |
|
|
|
for master in restart_master_list: |
|
|
|
for master in restart_master_list: |
|
|
|
print("master " + self.get_ip_by_hostname(master) + " 服务已经掉了") |
|
|
|
print("master " + self.get_ip_by_hostname(master) + " 服务已经掉了") |
|
|
|
os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start master-server') |
|
|
|
os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start master-server') |
|
|
|
|
|
|
|
|
|
|
|
if (self.zk.exists('/escheduler/workers')): |
|
|
|
if (self.zk.exists(workers_zk_path)): |
|
|
|
zk_worker_list = [] |
|
|
|
zk_worker_list = [] |
|
|
|
zk_worker_nodes = self.zk.get_children('/escheduler/workers') |
|
|
|
zk_worker_nodes = self.zk.get_children(workers_zk_path) |
|
|
|
for zk_worker_node in zk_worker_nodes: |
|
|
|
for zk_worker_node in zk_worker_nodes: |
|
|
|
zk_worker_list.append(zk_worker_node.split('_')[0]) |
|
|
|
zk_worker_list.append(zk_worker_node.split('_')[0]) |
|
|
|
restart_worker_list = list(set(worker_list) - set(zk_worker_list)) |
|
|
|
restart_worker_list = list(set(worker_list) - set(zk_worker_list)) |
|
|
|
if (len(restart_worker_list) != 0): |
|
|
|
if (len(restart_worker_list) != 0): |
|
|
|
for worker in restart_worker_list: |
|
|
|
for worker in restart_worker_list: |
|
|
|
print("worker " + self.get_ip_by_hostname(worker) + " 服务已经掉了") |
|
|
|
print("worker " + self.get_ip_by_hostname(worker) + " 服务已经掉了") |
|
|
|
os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start worker-server') |
|
|
|
os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start worker-server') |
|
|
|
|
|
|
|
|
|
|
|
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) |
|
|
|
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) |
|
|
|
schedule.enter(inc, 0, self.restart_server, (inc,)) |
|
|
|
schedule.enter(inc, 0, self.restart_server, (inc,)) |
|
|
@ -84,5 +90,11 @@ class ZkClient: |
|
|
|
schedule.enter(0, 0, self.restart_server, (inc,)) |
|
|
|
schedule.enter(0, 0, self.restart_server, (inc,)) |
|
|
|
schedule.run() |
|
|
|
schedule.run() |
|
|
|
if __name__ == '__main__': |
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
if (len(sys.argv) < 4): |
|
|
|
|
|
|
|
print('please input install_path,zookeepers,masters_zk_path and worker_zk_path') |
|
|
|
|
|
|
|
install_path = sys.argv[1] |
|
|
|
|
|
|
|
zookeepers = sys.argv[2] |
|
|
|
|
|
|
|
masters_zk_path = sys.argv[3] |
|
|
|
|
|
|
|
workers_zk_path = sys.argv[4] |
|
|
|
zkClient = ZkClient() |
|
|
|
zkClient = ZkClient() |
|
|
|
zkClient.main(300) |
|
|
|
zkClient.main(300) |
|
|
|