#coding=utf-8 import os import sys import syslog import subprocess import time import re ##define KERN_EMERG "<0>" /* system is unusable */ ##define KERN_ALERT "<1>" /* action must be taken immediately */ ##define KERN_CRIT "<2>" /* critical conditions */ ##define KERN_ERR "<3>" /* error conditions */ ##define KERN_WARNING "<4>" /* warning conditions */ ##define KERN_NOTICE "<5>" /* normal but significant condition */ ##define KERN_INFO "<6>" /* informational */ ##define KERN_DEBUG "<7>" /* debug-level messages */ MSG_PREFIX = ['EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG'] G_LOCAL_NODE_NAME = "TSG_MXN" class CommandException(Exception): pass def tsg_restart_err_log(error_num, user_msg): msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], user_msg) syslog.syslog(syslog.LOG_ERR, msg) print (msg) msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], "tsg software reboot error") syslog.syslog(syslog.LOG_ERR, msg) print (msg) sys.exit(error_num) def tsg_restart_succ_log(): msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_NOTICE], "tsg software reboot success") syslog.syslog(syslog.LOG_NOTICE, msg) print (msg) sys.exit(0) #return exitcode value + output message: # 0: succ # 1: error def system_cmd_run(cmd_str): dangerous_cmd = {"rm", "mv", "poweroff", "shutdown"} for cmd in dangerous_cmd: pattern = "\s*%s" %(cmd) match_str = re.match(pattern, cmd_str) if not match_str is None: print("can't run this cmd:%s" %(cmd_str)) sys.exit(1) try: exitcode, output = subprocess.getstatusoutput(cmd_str) except Exception as e: print(e) print("###### %s" %(e.message)) #if exitcode != 0: # output = "" return 1, e.message return exitcode, output #return value: # 1: progcess of prog_name is exist # 0: progcess of prog_name is not exist def tsg_check_process_health_by_ps(module_name): cmd_str = "ps -afx | grep %s | grep -v grep" %(module_name) exitcode, output = system_cmd_run(cmd_str) if exitcode == 0: return 1 return 0 #return value: # 1: progcess of prog_name is exist # 0: progcess of prog_name is not exist def tsg_check_process_health_by_systemctl_status(module_name): print("systemctl_status check method TODO!") sys.exit(1) return 0 def tsg_kill_app_process_by_killall(module_name, extra_progs): #todo , stop sapp, xxx, check process exist or not, maybe zombie, maybe very slow command = "killall -9 %s %s" %(module_name, extra_progs) try: exitcode, output = subprocess.getstatusoutput(command) #print("%d" %(exitcode)) except Exception as e: pass if exitcode != 0: return 1 return 0 def tsg_kill_app_process_by_systemctl_stop(module_name): print("TODO") sys.exit(1) return 0 def tsg_stop_app_process(module_name, extra_progs, stop_method, check_method): res_code = 0 running_flag = 0 if check_method == 'ps': check_func = tsg_check_process_health_by_ps elif check_method == 'systemctl_status': check_func = tsg_check_process_health_by_systemctl_status else: print("not support check method:%s" %(check_method)) sys.exit(1) running_flag = check_func(module_name) if running_flag == 0: #print("%s is not running, start it..." %(module_name)) return 0 #ready to stop progcess, retry for 3 times if stop_method == 'killall': stop_func = tsg_kill_app_process_by_killall elif stop_method == 'systemctl_stop': stop_func = tsg_kill_app_process_by_systemctl_stop else: print("not support stop method:%s" %(check_method)) sys.exit(1) for times in range(3): stop_func(module_name, extra_progs) #此处不判断stop_func的返回值, 可能程序不存在, 可能守护不存在,等原因 #直接用check()方法检测stop()的成功 res_code = tsg_check_process_health_by_ps(module_name) if res_code != 0: continue else: break if res_code != 0: errmsg = "can't stop process %s" %(module_name) tsg_restart_err_log(res_code, errmsg) return res_code def tsg_start_app_process_by_exec_call(module_name, module_cwd, module_exe, check_method): try: os.chdir(module_cwd) except Exception as e: print("%s" %(e)) return 1 cmd_str = "./%s" %(module_exe) ret_code, output = system_cmd_run(cmd_str) if ret_code != 0: errmsg = "start program %s error, call %s/%s failed" %(module_name, module_cwd, module_exe) tsg_restart_err_log(ret_code, errmsg) if check_method == 'ps': check_func = tsg_check_process_health_by_ps elif check_method == 'systemctl_status': check_func = tsg_check_process_health_by_systemctl_status else: print("not support check method:%s" %(check_method)) return 1 running_flag = check_func(module_name) if running_flag == 0: errmsg = "start process %s error" %(module_name) tsg_restart_err_log(ret_code, errmsg) return 0 def tsg_start_app_process_by_systemctl_start(module_name, module_cwd, module_exe, check_method): print("TODO") sys.exit(1) def tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method): if start_method == 'exec_call': start_func = tsg_start_app_process_by_exec_call elif start_method == 'systemctl_start': start_func = tsg_start_app_process_by_systemctl_start else: print("not support start method:%s" %(start_method)) sys.exit(1) ret = start_func(module_name, module_cwd, module_exe, check_method) if ret != 0: sys.exit(1) return 0 #参数说明: # # module_name: 模块名称 # extra_progs: 其他需要kill的附加程序, 如sapp的r3守护, 需要杀掉, 否则后台可能会重复启动sapp # module_cwd: 应用的绝对路径 # module_exe: 启动应用的名称, 可能跟module_name不一样, 比如用r2启动sapp # stop_method: 停止应用方法 # start_method: 启动应用方法 # check_method: 检测应用是否运行方法 # def tsg_restart_app_process(module_name, extra_progs, module_cwd, module_exe, stop_method, start_method, check_method): res_code = 0 res_code = tsg_stop_app_process(module_name, extra_progs, stop_method, check_method) if res_code != 0: return res_code res_code = tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method) if res_code != 0: return res_code return 0 def tsg_software_reboot(): #G_LOCAL_NODE_NAME = get_local_node_name() log_handle = syslog.openlog(G_LOCAL_NODE_NAME) tsg_restart_app_process("sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec_call", "ps") #tsg_restart_app_process("telegraf", "systemctl_stop", "systemctl_start", "systemctl_status") #tsg_restart_app_process("marsio", "systemctl_stop", "systemctl_start", "systemctl_status") #tsg_restart_app_process("influxd", "systemctl_stop", "systemctl_start", "systemctl_status") tsg_restart_succ_log() if __name__ == '__main__': tsg_software_reboot()