#!/usr/bin/env python
# -*- coding:utf-8 -*-
'''
1 , yum install pip
yum - y install python - pip
2 , pip install kazoo
pip install kazoo
or
3 , conda install kazoo
conda install - c conda - forge kazoo
run script and parameter description :
nohup python - u monitor_server . py / data1_1T / dolphinscheduler 192.168 . xx . xx : 2181 , 192.168 . xx . xx : 2181 , 192.168 . xx . xx : 2181 / dolphinscheduler / masters / dolphinscheduler / workers > monitor_server . log 2 > & 1 &
the parameters are as follows :
/ data1_1T / dolphinscheduler : the value comes from the installPath in install . sh
192.168 . xx . xx : 2181 , 192.168 . xx . xx : 2181 , 192.168 . xx . xx : 2181 : the value comes from zkQuorum in install . sh
the value comes from zkWorkers in install . sh
/ dolphinscheduler / masters : the value comes from zkMasters in install . sh
/ dolphinscheduler / workers : the value comes from zkWorkers in install . sh
'''
import sys
import socket
import os
import sched
import time
from datetime import datetime
from kazoo . client import KazooClient
schedule = sched . scheduler ( time . time , time . sleep )
class ZkClient :
def __init__ ( self ) :
# hosts configuration zk address cluster
self . zk = KazooClient ( hosts = zookeepers )
self . zk . start ( )
# read configuration files and assemble them into a dictionary
def read_file ( self , path ) :
with open ( path , ' r ' ) as f :
dict = { }
for line in f . readlines ( ) :
arr = line . strip ( ) . split ( ' = ' )
if ( len ( arr ) == 2 ) :
dict [ arr [ 0 ] ] = arr [ 1 ]
return dict
# get the ip address according to hostname
def get_ip_by_hostname ( self , hostname ) :
return socket . gethostbyname ( hostname )
# restart server
def restart_server ( self , inc ) :
config_dict = self . read_file ( install_path + ' /conf/config/run_config.conf ' )
master_list = config_dict . get ( ' masters ' ) . split ( ' , ' )
print master_list
master_list = list ( map ( lambda item : self . get_ip_by_hostname ( item ) , master_list ) )
worker_list = config_dict . get ( ' workers ' ) . split ( ' , ' )
print worker_list
worker_list = list ( map ( lambda item : self . get_ip_by_hostname ( item ) , worker_list ) )
if ( self . zk . exists ( masters_zk_path ) ) :
zk_master_list = [ ]
zk_master_nodes = self . zk . get_children ( masters_zk_path )
for zk_master_node in zk_master_nodes :
zk_master_list . append ( zk_master_node . split ( ' _ ' ) [ 0 ] )
restart_master_list = list ( set ( master_list ) - set ( zk_master_list ) )
if ( len ( restart_master_list ) != 0 ) :
for master in restart_master_list :
print ( " master " + self . get_ip_by_hostname ( master ) + " server has down " )
os . system ( ' ssh ' + self . get_ip_by_hostname ( master ) + ' sh ' + install_path + ' /bin/dolphinscheduler-daemon.sh start master-server ' )
if ( self . zk . exists ( workers_zk_path ) ) :
zk_worker_list = [ ]
zk_worker_nodes = self . zk . get_children ( workers_zk_path )
for zk_worker_node in zk_worker_nodes :
zk_worker_list . append ( zk_worker_node . split ( ' _ ' ) [ 0 ] )
restart_worker_list = list ( set ( worker_list ) - set ( zk_worker_list ) )
if ( len ( restart_worker_list ) != 0 ) :
for worker in restart_worker_list :
print ( " worker " + self . get_ip_by_hostname ( worker ) + " server has down " )
os . system ( ' ssh ' + self . get_ip_by_hostname ( worker ) + ' sh ' + install_path + ' /bin/dolphinscheduler-daemon.sh start worker-server ' )
print ( datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S " ) )
schedule . enter ( inc , 0 , self . restart_server , ( inc , ) )
# default parameter 60s
def main ( self , inc = 60 ) :
# the enter four parameters are: interval event, priority (sequence for simultaneous execution of two events arriving at the same time), function triggered by the call,
# the argument to the trigger function (tuple form)
schedule . enter ( 0 , 0 , self . restart_server , ( inc , ) )
schedule . run ( )
if __name__ == ' __main__ ' :
if ( len ( sys . argv ) < 4 ) :
print ( ' please input install_path,zookeepers,masters_zk_path and worker_zk_path ' )
install_path = sys . argv [ 1 ]
zookeepers = sys . argv [ 2 ]
masters_zk_path = sys . argv [ 3 ]
workers_zk_path = sys . argv [ 4 ]
zkClient = ZkClient ( )
zkClient . main ( 300 )