231 lines
7.4 KiB
Python
231 lines
7.4 KiB
Python
#coding=utf-8
|
|
import os
|
|
import sys
|
|
import syslog
|
|
import subprocess
|
|
import time
|
|
import re
|
|
|
|
##define KERN_EMERG "<0>" /* system is unusable */
|
|
##define KERN_ALERT "<1>" /* action must be taken immediately */
|
|
##define KERN_CRIT "<2>" /* critical conditions */
|
|
##define KERN_ERR "<3>" /* error conditions */
|
|
##define KERN_WARNING "<4>" /* warning conditions */
|
|
##define KERN_NOTICE "<5>" /* normal but significant condition */
|
|
##define KERN_INFO "<6>" /* informational */
|
|
##define KERN_DEBUG "<7>" /* debug-level messages */
|
|
|
|
MSG_PREFIX = ['EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG']
|
|
|
|
G_LOCAL_NODE_NAME = "TSG_MXN"
|
|
|
|
class CommandException(Exception):
|
|
pass
|
|
|
|
def tsg_restart_err_log(error_num, user_msg):
|
|
msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], user_msg)
|
|
syslog.syslog(syslog.LOG_ERR, msg)
|
|
print (msg)
|
|
|
|
msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], "tsg software reboot error")
|
|
syslog.syslog(syslog.LOG_ERR, msg)
|
|
print (msg)
|
|
|
|
sys.exit(error_num)
|
|
|
|
def tsg_restart_succ_log():
|
|
msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_NOTICE], "tsg software reboot success")
|
|
syslog.syslog(syslog.LOG_NOTICE, msg)
|
|
print (msg)
|
|
sys.exit(0)
|
|
|
|
#return exitcode value + output message:
|
|
# 0: succ
|
|
# 1: error
|
|
def system_cmd_run(cmd_str):
|
|
dangerous_cmd = {"rm", "mv", "poweroff", "shutdown"}
|
|
|
|
for cmd in dangerous_cmd:
|
|
pattern = "\s*%s" %(cmd)
|
|
match_str = re.match(pattern, cmd_str)
|
|
if not match_str is None:
|
|
print("can't run this cmd:%s" %(cmd_str))
|
|
sys.exit(1)
|
|
|
|
try:
|
|
exitcode, output = subprocess.getstatusoutput(cmd_str)
|
|
except Exception as e:
|
|
print(e)
|
|
print("###### %s" %(e.message))
|
|
#if exitcode != 0:
|
|
# output = ""
|
|
return 1, e.message
|
|
|
|
return exitcode, output
|
|
|
|
#return value:
|
|
# 1: progcess of prog_name is exist
|
|
# 0: progcess of prog_name is not exist
|
|
def tsg_check_process_health_by_ps(module_name):
|
|
cmd_str = "ps -afx | grep %s | grep -v grep" %(module_name)
|
|
exitcode, output = system_cmd_run(cmd_str)
|
|
if exitcode == 0:
|
|
return 1
|
|
|
|
return 0
|
|
|
|
#return value:
|
|
# 1: progcess of prog_name is exist
|
|
# 0: progcess of prog_name is not exist
|
|
def tsg_check_process_health_by_systemctl_status(module_name):
|
|
print("systemctl_status check method TODO!")
|
|
sys.exit(1)
|
|
return 0
|
|
|
|
def tsg_kill_app_process_by_killall(module_name, extra_progs):
|
|
#todo , stop sapp, xxx, check process exist or not, maybe zombie, maybe very slow
|
|
command = "killall -9 %s %s" %(module_name, extra_progs)
|
|
try:
|
|
exitcode, output = subprocess.getstatusoutput(command)
|
|
#print("%d" %(exitcode))
|
|
except Exception as e:
|
|
pass
|
|
|
|
if exitcode != 0:
|
|
return 1
|
|
|
|
return 0
|
|
|
|
def tsg_kill_app_process_by_systemctl_stop(module_name):
|
|
print("TODO")
|
|
sys.exit(1)
|
|
return 0
|
|
|
|
def tsg_stop_app_process(module_name, extra_progs, stop_method, check_method):
|
|
res_code = 0
|
|
running_flag = 0
|
|
|
|
if check_method == 'ps':
|
|
check_func = tsg_check_process_health_by_ps
|
|
elif check_method == 'systemctl_status':
|
|
check_func = tsg_check_process_health_by_systemctl_status
|
|
else:
|
|
print("not support check method:%s" %(check_method))
|
|
sys.exit(1)
|
|
|
|
running_flag = check_func(module_name)
|
|
|
|
if running_flag == 0:
|
|
#print("%s is not running, start it..." %(module_name))
|
|
return 0
|
|
#ready to stop progcess, retry for 3 times
|
|
|
|
if stop_method == 'killall':
|
|
stop_func = tsg_kill_app_process_by_killall
|
|
elif stop_method == 'systemctl_stop':
|
|
stop_func = tsg_kill_app_process_by_systemctl_stop
|
|
else:
|
|
print("not support stop method:%s" %(check_method))
|
|
sys.exit(1)
|
|
|
|
for times in range(3):
|
|
stop_func(module_name, extra_progs)
|
|
#此处不判断stop_func的返回值, 可能程序不存在, 可能守护不存在,等原因
|
|
#直接用check()方法检测stop()的成功
|
|
res_code = tsg_check_process_health_by_ps(module_name)
|
|
if res_code != 0:
|
|
continue
|
|
else:
|
|
break
|
|
|
|
if res_code != 0:
|
|
errmsg = "can't stop process %s" %(module_name)
|
|
tsg_restart_err_log(res_code, errmsg)
|
|
|
|
return res_code
|
|
|
|
def tsg_start_app_process_by_exec_call(module_name, module_cwd, module_exe, check_method):
|
|
try:
|
|
os.chdir(module_cwd)
|
|
except Exception as e:
|
|
print("%s" %(e))
|
|
return 1
|
|
|
|
cmd_str = "./%s" %(module_exe)
|
|
ret_code, output = system_cmd_run(cmd_str)
|
|
if ret_code != 0:
|
|
errmsg = "start program %s error, call %s/%s failed" %(module_name, module_cwd, module_exe)
|
|
tsg_restart_err_log(ret_code, errmsg)
|
|
|
|
if check_method == 'ps':
|
|
check_func = tsg_check_process_health_by_ps
|
|
elif check_method == 'systemctl_status':
|
|
check_func = tsg_check_process_health_by_systemctl_status
|
|
else:
|
|
print("not support check method:%s" %(check_method))
|
|
return 1
|
|
|
|
running_flag = check_func(module_name)
|
|
if running_flag == 0:
|
|
errmsg = "start process %s error" %(module_name)
|
|
tsg_restart_err_log(ret_code, errmsg)
|
|
|
|
return 0
|
|
|
|
def tsg_start_app_process_by_systemctl_start(module_name, module_cwd, module_exe, check_method):
|
|
print("TODO")
|
|
sys.exit(1)
|
|
|
|
def tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method):
|
|
if start_method == 'exec_call':
|
|
start_func = tsg_start_app_process_by_exec_call
|
|
elif start_method == 'systemctl_start':
|
|
start_func = tsg_start_app_process_by_systemctl_start
|
|
else:
|
|
print("not support start method:%s" %(start_method))
|
|
sys.exit(1)
|
|
|
|
ret = start_func(module_name, module_cwd, module_exe, check_method)
|
|
if ret != 0:
|
|
sys.exit(1)
|
|
|
|
return 0
|
|
|
|
#参数说明:
|
|
#
|
|
# module_name: 模块名称
|
|
# extra_progs: 其他需要kill的附加程序, 如sapp的r3守护, 需要杀掉, 否则后台可能会重复启动sapp
|
|
# module_cwd: 应用的绝对路径
|
|
# module_exe: 启动应用的名称, 可能跟module_name不一样, 比如用r2启动sapp
|
|
# stop_method: 停止应用方法
|
|
# start_method: 启动应用方法
|
|
# check_method: 检测应用是否运行方法
|
|
#
|
|
def tsg_restart_app_process(module_name, extra_progs, module_cwd, module_exe, stop_method, start_method, check_method):
|
|
res_code = 0
|
|
|
|
res_code = tsg_stop_app_process(module_name, extra_progs, stop_method, check_method)
|
|
if res_code != 0:
|
|
return res_code
|
|
|
|
res_code = tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method)
|
|
if res_code != 0:
|
|
return res_code
|
|
|
|
return 0
|
|
|
|
def tsg_software_reboot():
|
|
#G_LOCAL_NODE_NAME = get_local_node_name()
|
|
|
|
log_handle = syslog.openlog(G_LOCAL_NODE_NAME)
|
|
|
|
tsg_restart_app_process("sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec_call", "ps")
|
|
#tsg_restart_app_process("telegraf", "systemctl_stop", "systemctl_start", "systemctl_status")
|
|
#tsg_restart_app_process("marsio", "systemctl_stop", "systemctl_start", "systemctl_status")
|
|
#tsg_restart_app_process("influxd", "systemctl_stop", "systemctl_start", "systemctl_status")
|
|
|
|
tsg_restart_succ_log()
|
|
|
|
if __name__ == '__main__':
|
|
tsg_software_reboot()
|