diff --git a/install_config/group_vars/all.yml b/install_config/group_vars/all.yml index a4c71fe..80a14f6 100644 --- a/install_config/group_vars/all.yml +++ b/install_config/group_vars/all.yml @@ -32,7 +32,7 @@ oam: mcn0_ip: "192.168.100.1" tsg_cli: - rpm_file_name: "tsg-cli-1.0.19.ecf9c86-1.el7.x86_64.rpm" + rpm_file_name: "tsg-cli-1.1.0.32ab89f-1.el7.x86_64.rpm" ha_master: instance_state: MASTER diff --git a/roles/ha_master/templates/check_service_health.sh.j2 b/roles/ha_master/templates/check_service_health.sh.j2 index 0188ca0..f407c76 100644 --- a/roles/ha_master/templates/check_service_health.sh.j2 +++ b/roles/ha_master/templates/check_service_health.sh.j2 @@ -1,36 +1,46 @@ -#!/bin/bash +#!/bin/sh #running:0, stop:3 + CONSUL_STATE=0 OAM_CLUSTER_STATE=0 -INFLUXDB_STATE=0 +INFLUXD_STATE=0 check_consul_state(){ - CONSUL_STATE=$(systemctl status oam_cluster.service) - if [ $CONSUL_STATE -ne 0 ] - then - return 1 - else - return 0 - fi + CONSUL_RESULT=$(systemctl --quiet status consul-server-external_new.service > /dev/null 2>&1) + CONSUL_STATE=$? + if [ $CONSUL_STATE -ne 0 ] + then + echo "consul.service fail" + return 1 + else + echo "consul.service succ" + return 0 + fi } check_oam_cluster_state(){ - OAM_CLUSTER_STATE=$(systemctl status oam_cluster.service) + OAM_CLUSTER_RESULT=$(systemctl status oam_cluster.service > /dev/null 2>&1) + OAM_CLUSTER_STATE=$? if [ $OAM_CLUSTER_STATE -ne 0 ] - then - return 1 + then + echo "oam_cluster.service fail" + return 1 else + echo "oam_cluster.service succ" return 0 fi } check_influxdb_state(){ - INFLUXDB_STATE=$(systemctl status influxd.service) - if [ $INFLUXDB_STATE -ne 0 ] + INFLUXD_RESULT=$(systemctl status influxd.service > /dev/null 2>&1) + INFLUXD_STATE=$? + if [ $INFLUXD_STATE -ne 0 ] then + echo "influxd.service fail" return 1 else + echo "influxd.service succ" return 0 fi } @@ -39,22 +49,22 @@ check_influxdb_state(){ #return value: 0:succ; 1:error check_service_health(){ - state = check_consul_state() - if [ $state -ne 0 ] + check_consul_state + if [ $? -ne 0 ] then echo "consul service fail!" return 1 fi - state = check_oam_cluster_state() - if [ $state -ne 0 ] + check_oam_cluster_state + if [ $? -ne 0 ] then echo "oam_cluster service fail!" return 1 fi - state = check_influxdb_state() - if [ $state -ne 0 ] + check_influxdb_state + if [ $? -ne 0 ] then echo "influxd service fail!" return 1 @@ -63,22 +73,52 @@ check_service_health(){ return 0 } +service_try_restart(){ + check_consul_state + if [ $? -ne 0 ] + then + echo " restart consul service..." + TEMP_RESULT=`systemctl restart consul-server-external_new.service` + sleep 3 + fi -check_service_health() -if [ $? ne 0] + check_oam_cluster_state + if [ $? -ne 0 ] + then + echo " restart oam_cluster service..." + TEMP_RESULT=`systemctl restart oam_cluster.service` + sleep 3 + fi + + check_influxdb_state + if [ $? -ne 0 ] + then + echo " restart influxd service..." + TEMP_RESULT=`systemctl restart influxd.service` + sleep 3 + fi + + return 0 +} + +check_service_health +if [ $? -ne 0 ] then - + echo "check service health fail, try restart...." + service_try_restart +else + echo "check service health succ!" + exit 0 fi - #检查到错误且重启之后, 再检查一遍 check_service_health() -if [ $? ne 0] +if [ $? -ne 0 ] then echo "check service health fail!" exit 1 else - echo "check service health succ!" + echo "check service health succ!" exit 0 fi diff --git a/roles/tsg-common-files/tsg-cli-1.0.19.ecf9c86-1.el7.x86_64.rpm b/roles/tsg-common-files/tsg-cli-1.1.0.32ab89f-1.el7.x86_64.rpm similarity index 91% rename from roles/tsg-common-files/tsg-cli-1.0.19.ecf9c86-1.el7.x86_64.rpm rename to roles/tsg-common-files/tsg-cli-1.1.0.32ab89f-1.el7.x86_64.rpm index 57c177d..d5b405f 100644 Binary files a/roles/tsg-common-files/tsg-cli-1.0.19.ecf9c86-1.el7.x86_64.rpm and b/roles/tsg-common-files/tsg-cli-1.1.0.32ab89f-1.el7.x86_64.rpm differ