This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
lijia-tsg-oam/py_cmd/tsg_software_reboot.py
2019-08-08 19:12:32 +08:00

231 lines
7.4 KiB
Python

#coding=utf-8
import os
import sys
import syslog
import subprocess
import time
import re
##define KERN_EMERG "<0>" /* system is unusable */
##define KERN_ALERT "<1>" /* action must be taken immediately */
##define KERN_CRIT "<2>" /* critical conditions */
##define KERN_ERR "<3>" /* error conditions */
##define KERN_WARNING "<4>" /* warning conditions */
##define KERN_NOTICE "<5>" /* normal but significant condition */
##define KERN_INFO "<6>" /* informational */
##define KERN_DEBUG "<7>" /* debug-level messages */
MSG_PREFIX = ['EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG']
G_LOCAL_NODE_NAME = "TSG_MXN"
class CommandException(Exception):
pass
def tsg_restart_err_log(error_num, user_msg):
msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], user_msg)
syslog.syslog(syslog.LOG_ERR, msg)
print (msg)
msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], "tsg software reboot error")
syslog.syslog(syslog.LOG_ERR, msg)
print (msg)
sys.exit(error_num)
def tsg_restart_succ_log():
msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_NOTICE], "tsg software reboot success")
syslog.syslog(syslog.LOG_NOTICE, msg)
print (msg)
sys.exit(0)
#return exitcode value + output message:
# 0: succ
# 1: error
def system_cmd_run(cmd_str):
dangerous_cmd = {"rm", "mv", "poweroff", "shutdown"}
for cmd in dangerous_cmd:
pattern = "\s*%s" %(cmd)
match_str = re.match(pattern, cmd_str)
if not match_str is None:
print("can't run this cmd:%s" %(cmd_str))
sys.exit(1)
try:
exitcode, output = subprocess.getstatusoutput(cmd_str)
except Exception as e:
print(e)
print("###### %s" %(e.message))
#if exitcode != 0:
# output = ""
return 1, e.message
return exitcode, output
#return value:
# 1: progcess of prog_name is exist
# 0: progcess of prog_name is not exist
def tsg_check_process_health_by_ps(module_name):
cmd_str = "ps -afx | grep %s | grep -v grep" %(module_name)
exitcode, output = system_cmd_run(cmd_str)
if exitcode == 0:
return 1
return 0
#return value:
# 1: progcess of prog_name is exist
# 0: progcess of prog_name is not exist
def tsg_check_process_health_by_systemctl_status(module_name):
print("systemctl_status check method TODO!")
sys.exit(1)
return 0
def tsg_kill_app_process_by_killall(module_name, extra_progs):
#todo , stop sapp, xxx, check process exist or not, maybe zombie, maybe very slow
command = "killall -9 %s %s" %(module_name, extra_progs)
try:
exitcode, output = subprocess.getstatusoutput(command)
#print("%d" %(exitcode))
except Exception as e:
pass
if exitcode != 0:
return 1
return 0
def tsg_kill_app_process_by_systemctl_stop(module_name):
print("TODO")
sys.exit(1)
return 0
def tsg_stop_app_process(module_name, extra_progs, stop_method, check_method):
res_code = 0
running_flag = 0
if check_method == 'ps':
check_func = tsg_check_process_health_by_ps
elif check_method == 'systemctl_status':
check_func = tsg_check_process_health_by_systemctl_status
else:
print("not support check method:%s" %(check_method))
sys.exit(1)
running_flag = check_func(module_name)
if running_flag == 0:
#print("%s is not running, start it..." %(module_name))
return 0
#ready to stop progcess, retry for 3 times
if stop_method == 'killall':
stop_func = tsg_kill_app_process_by_killall
elif stop_method == 'systemctl_stop':
stop_func = tsg_kill_app_process_by_systemctl_stop
else:
print("not support stop method:%s" %(check_method))
sys.exit(1)
for times in range(3):
stop_func(module_name, extra_progs)
#此处不判断stop_func的返回值, 可能程序不存在, 可能守护不存在,等原因
#直接用check()方法检测stop()的成功
res_code = tsg_check_process_health_by_ps(module_name)
if res_code != 0:
continue
else:
break
if res_code != 0:
errmsg = "can't stop process %s" %(module_name)
tsg_restart_err_log(res_code, errmsg)
return res_code
def tsg_start_app_process_by_exec_call(module_name, module_cwd, module_exe, check_method):
try:
os.chdir(module_cwd)
except Exception as e:
print("%s" %(e))
return 1
cmd_str = "./%s" %(module_exe)
ret_code, output = system_cmd_run(cmd_str)
if ret_code != 0:
errmsg = "start program %s error, call %s/%s failed" %(module_name, module_cwd, module_exe)
tsg_restart_err_log(ret_code, errmsg)
if check_method == 'ps':
check_func = tsg_check_process_health_by_ps
elif check_method == 'systemctl_status':
check_func = tsg_check_process_health_by_systemctl_status
else:
print("not support check method:%s" %(check_method))
return 1
running_flag = check_func(module_name)
if running_flag == 0:
errmsg = "start process %s error" %(module_name)
tsg_restart_err_log(ret_code, errmsg)
return 0
def tsg_start_app_process_by_systemctl_start(module_name, module_cwd, module_exe, check_method):
print("TODO")
sys.exit(1)
def tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method):
if start_method == 'exec_call':
start_func = tsg_start_app_process_by_exec_call
elif start_method == 'systemctl_start':
start_func = tsg_start_app_process_by_systemctl_start
else:
print("not support start method:%s" %(start_method))
sys.exit(1)
ret = start_func(module_name, module_cwd, module_exe, check_method)
if ret != 0:
sys.exit(1)
return 0
#参数说明:
#
# module_name: 模块名称
# extra_progs: 其他需要kill的附加程序, 如sapp的r3守护, 需要杀掉, 否则后台可能会重复启动sapp
# module_cwd: 应用的绝对路径
# module_exe: 启动应用的名称, 可能跟module_name不一样, 比如用r2启动sapp
# stop_method: 停止应用方法
# start_method: 启动应用方法
# check_method: 检测应用是否运行方法
#
def tsg_restart_app_process(module_name, extra_progs, module_cwd, module_exe, stop_method, start_method, check_method):
res_code = 0
res_code = tsg_stop_app_process(module_name, extra_progs, stop_method, check_method)
if res_code != 0:
return res_code
res_code = tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method)
if res_code != 0:
return res_code
return 0
def tsg_software_reboot():
#G_LOCAL_NODE_NAME = get_local_node_name()
log_handle = syslog.openlog(G_LOCAL_NODE_NAME)
tsg_restart_app_process("sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec_call", "ps")
#tsg_restart_app_process("telegraf", "systemctl_stop", "systemctl_start", "systemctl_status")
#tsg_restart_app_process("marsio", "systemctl_stop", "systemctl_start", "systemctl_status")
#tsg_restart_app_process("influxd", "systemctl_stop", "systemctl_start", "systemctl_status")
tsg_restart_succ_log()
if __name__ == '__main__':
tsg_software_reboot()