1.更新oam_core计算内存占用率阈值算法, buff/cache不算已占用;

2.更新tsg-monitor.sh清理/tmp/_MEI临时文件,影响其他模块正常运行的BUG;
This commit is contained in:
lijia
2020-11-16 16:35:21 +08:00
parent 1e23a971b6
commit d9db681492
19 changed files with 73 additions and 43 deletions

View File

@@ -52,7 +52,7 @@ oam:
alarm_app_network_major_high: "0.0001"
tsg_cli:
rpm_file_name: "tsg-cli-1.1.2.93221a7-1.el7.x86_64.rpm"
rpm_file_name: "tsg-cli-1.1.7.420d736-1.el7.x86_64.rpm"
ha_master:
instance_state: MASTER

View File

@@ -3,6 +3,7 @@ Description=oam_cli_agent daemon
[Service]
Type=simple
ExecStartPre=/usr/bin/mkdir -p /tmp/__tsg_monitor_tmp_dir
Environment=OAM_DIR=/opt/tsg/cli_agent/
ExecStart=/usr/lib/jvm/{{ java_version }}/bin/java -jar ${OAM_DIR}oam_cli_agent.jar --spring.config.location=${OAM_DIR}application.properties,${OAM_DIR}application-centos2.properties --logging.file=${OAM_DIR}oam-cli-agent.log

View File

@@ -4,6 +4,7 @@ Description=oam_cli_agent daemon
[Service]
Type=simple
Environment=OAM_DIR=/opt/tsg/cli_agent/
ExecStartPre=/usr/bin/mkdir -p /tmp/__tsg_monitor_tmp_dir
ExecStart=/usr/lib/jvm/{{ java_version }}/bin/java -jar ${OAM_DIR}oam_cli_agent.jar --spring.config.location=${OAM_DIR}application.properties,${OAM_DIR}application-centos2.properties --logging.file=${OAM_DIR}oam-cli-agent.log
[Install]

View File

@@ -4,7 +4,7 @@ Description=oam_core daemon
[Service]
Type=simple
Environment=OAM_DIR=/opt/tsg/oam_core/
ExecStart=/usr/lib/jvm/{{ java_version }}/bin/java -jar ${OAM_DIR}oam_core.jar --spring.config.location=${OAM_DIR}application.properties,${OAM_DIR}application-centos2.properties --logging.file=${OAM_DIR}oam-core.log
ExecStart=/usr/lib/jvm/{{ java_version }}/bin/java -jar ${OAM_DIR}oam-core.jar --spring.config.location=${OAM_DIR}application.properties,${OAM_DIR}application-centos2.properties --logging.file=${OAM_DIR}oam-core.log
[Install]
WantedBy=multi-user.target

View File

@@ -13,31 +13,22 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_app
killall -9 -q tsg_monit_intercept
killall -9 -q tsg_monit_interface
#killall -9 -q tsg_monit_protocol_v3
killall -9 -q tsg_monit_protocol_v4
#killall -9 -q tsg_monit_stream_v3
killall -9 -q tsg_monit_stream_v4
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &
#/opt/tsg/tsg-monitor/tsg_update_tags > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_app > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_intercept > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_interface > /dev/null &
#/opt/tsg/tsg-monitor/tsg_monit_protocol_v3 > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_protocol_v4 > /dev/null &
#/opt/tsg/tsg-monitor/tsg_monit_stream_v3 > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_stream_v4 > /dev/null &
}

View File

@@ -57,6 +57,12 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -12,14 +12,11 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_interface
killall -9 -q tsg_monit_intercept
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &

View File

@@ -57,6 +57,11 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -12,14 +12,11 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_interface
killall -9 -q tsg_monit_intercept
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &

View File

@@ -57,6 +57,11 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -12,14 +12,11 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_interface
killall -9 -q tsg_monit_intercept
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &

View File

@@ -57,6 +57,11 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -8,21 +8,17 @@
#导致10秒内还不结束, 要强行杀掉,
#否则长时间运行后, 会有大量后台进程运行!
killall_uncompleted_cmd(){
killall -9 -q tsg_cluster_register
#killall -9 -q tsg_cluster_register
#killall -9 -q tsg_diagnose_background
#killall -9 -q tsg_update_tags
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_cluster_register > /dev/null &
#/opt/tsg/tsg-monitor/tsg_cluster_register > /dev/null &
#mxn板只检测cpu, mem, disk等, 前台cli命令启用diagnose,
#后台服务依靠oam snmp模块, 无需运行tsg_diagnose_background

View File

@@ -4,6 +4,7 @@ Requires=consul-client.service
After=consul-client.service
[Service]
ExecStartPre=/usr/bin/mkdir -p /tmp/__tsg_monitor_tmp_dir
ExecStart=/opt/tsg/tsg-monitor/tsg_cluster_register
Type=oneshot
RemainAfterExit=yes

View File

@@ -74,29 +74,44 @@
dest: "/opt/tsg/etc/tsg_series.json"
tags: template
#2020-11-13 lijia modify
- name: "copy tsg-monitor.service to destination server"
synchronize:
src: "{{ role_path }}/files/tsg-monitor.service"
dest: "/usr/lib/systemd/system"
#2020-11-13 lijia add
- name: "copy tsg_cluster_register.service to destination server"
synchronize:
src: "{{ role_path }}/files/tsg_cluster_register.service"
dest: "/usr/lib/systemd/system"
# 2020-11-13 lijia close
#- name: "copy tsg-monitor.sh to destination server"
# copy:
# src: "{{ role_path }}/files/tsg-monitor.sh"
# dest: "/opt/tsg/tsg-monitor/"
# mode: 0755
- name: "copy tsg-monitor.sh to destination server"
copy:
src: "{{ role_path }}/files/tsg-monitor.sh"
dest: "/opt/tsg/tsg-monitor/"
mode: 0755
- name: "copy rsyslog.conf to destination server"
synchronize:
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog
state: restarted
- name: "enable tsg-monitor service"
systemd:
name: tsg-monitor
enabled: yes
daemon_reload: yes
state: restarted
- name: "enable tsg_cluster_register.service"
systemd:

View File

@@ -0,0 +1,13 @@
#!/bin/sh
cd /tmp/__tsg_monitor_tmp_dir
if [ $? != 0 ]; then
#echo "cd to dir error!"
exit 1
fi
#cur_dir=`pwd`
#echo "cur dir is: $cur_dir"
find . -ctime +1 -type d -name "_MEI*" | xargs rm -rf