8 Commits

Author SHA1 Message Date
lijia
285e302374 更新oam_cli配置文件, 端口改为1122. 2020-11-21 20:01:34 +08:00
lijia
3b9b3c3c04 更新threshold.sql. 2020-11-21 18:01:15 +08:00
lijia
709ba7a80e 修改了一些bug,因涉及文件较大,使用ansible增量更新文件,而不是手工pscp,pssh方式. 2020-11-21 17:17:50 +08:00
lijia
f15cb784c6 更新tsg-cli-env.ini阈值, 与tsg_threshold.sql一致 2020-11-20 22:59:06 +08:00
lijia
ae637dc7e2 更新tsg-cli RPM包名称. 2020-11-20 21:22:15 +08:00
lijia
e9c4747c57 更新绑定端口880->1180, 822->1122. 2020-11-17 21:19:20 +08:00
lijia
8694b7025f 更新oam_core.rpm打包的oam_core.jar文件名称,使用下划线'_'分割. 2020-11-16 17:01:37 +08:00
lijia
d9db681492 1.更新oam_core计算内存占用率阈值算法, buff/cache不算已占用;
2.更新tsg-monitor.sh清理/tmp/_MEI临时文件,影响其他模块正常运行的BUG;
2020-11-16 16:35:21 +08:00
24 changed files with 217 additions and 104 deletions

31
20201121_inc_update.yml Normal file
View File

@@ -0,0 +1,31 @@
- hosts: blade-mxn
roles:
- tsg-cli-mxn
- oam_cli_agent_mxn
- oam_core
- hosts: blade-mcn0
roles:
- tsg-cli-mcn0
- setup_mariadb
- oam_cli_agent
- hosts: blade-mcn1
roles:
- tsg-cli-mcn1
- oam_cli_agent
- hosts: blade-mcn2
roles:
- tsg-cli-mcn2
- oam_cli_agent
- hosts: blade-mcn3
roles:
- tsg-cli-mcn3
- oam_cli_agent
- hosts: blade-mxn
roles:
- oam_mxn_start

View File

@@ -34,25 +34,25 @@ mariadb:
oam:
cluster_port: 880
core_port: 880
cli_agent_port: 822
core_port: 1180
cli_agent_port: 1122
snmp_port: 1611
snmptrapd_ip: "127.0.0.1"
snmptrapd_port: 164
mcn0_ip: "192.168.100.1"
alarm_cpu_critical_high: 90
alarm_cpu_major_high: 80
alarm_cpu_major_high: 85
alarm_mem_critical_high: 90
alarm_mem_major_high: 80
alarm_mem_major_high: 85
alarm_disk_critical_high: 90
alarm_disk_major_high: 80
alarm_interface_critical_high: "0.0001"
alarm_interface_major_high: "0.00005"
alarm_app_network_critical_high: "0.0001"
alarm_disk_major_high: 85
alarm_interface_critical_high: "0.0005"
alarm_interface_major_high: "0.0001"
alarm_app_network_critical_high: "0.0005"
alarm_app_network_major_high: "0.0001"
tsg_cli:
rpm_file_name: "tsg-cli-1.1.2.93221a7-1.el7.x86_64.rpm"
rpm_file_name: "tsg-cli-1.1.8.dc13c5d-1.el7.x86_64.rpm"
ha_master:
instance_state: MASTER

View File

@@ -1,39 +1,13 @@
- name: "copy oam_cli_agent.rpm to destination server"
copy:
src: "{{ role_path }}/files/"
dest: /tmp
- name: "install oam_cli_agent"
yum:
name:
- /tmp/oam_cli_agent-1.0-1.0.x86_64.rpm
state: present
- name: "bak snmpd.conf"
shell: cp -rf /usr/share/snmp/snmpd.conf /usr/share/snmp/snmpd.conf_origin
ignore_errors: yes
- name: "Templates snmpd.conf"
template:
src: "{{role_path}}/templates/snmpd.conf"
dest: /usr/share/snmp/snmpd.conf
tags: template
- name: "Templates oam_cli_agent.service"
template:
src: "{{ role_path }}/templates/oam_cli_agent.service.j2"
dest: /usr/lib/systemd/system/oam_cli_agent.service
- name: "Templates application.properties.j2"
template:
src: "{{ role_path }}/templates/application.properties.j2"
dest: /opt/tsg/cli_agent/application.properties
- name: "Start snmpd"
systemd:
name: snmpd
state: restarted
enabled: yes
- name: "Templates oam_cli_agent.service"
template:
src: "{{ role_path }}/templates/oam_cli_agent.service.j2"
dest: /usr/lib/systemd/system/oam_cli_agent.service
- name: "Start oam_cli_agent"
systemd:

View File

@@ -3,6 +3,7 @@ Description=oam_cli_agent daemon
[Service]
Type=simple
ExecStartPre=/usr/bin/mkdir -p /tmp/__tsg_monitor_tmp_dir
Environment=OAM_DIR=/opt/tsg/cli_agent/
ExecStart=/usr/lib/jvm/{{ java_version }}/bin/java -jar ${OAM_DIR}oam_cli_agent.jar --spring.config.location=${OAM_DIR}application.properties,${OAM_DIR}application-centos2.properties --logging.file=${OAM_DIR}oam-cli-agent.log

View File

@@ -1,27 +1,13 @@
- name: "copy oam_cli_agent_mxn.rpm to destination server"
copy:
src: "{{ role_path }}/files/"
dest: /tmp
- name: "install oam_cli_agent_mxn"
yum:
name:
- /tmp/oam_cli_agent_mxn-1.0-1.0.x86_64.rpm
state: present
- name: "Templates oam_cli_agent_mxn.service"
template:
src: "{{ role_path }}/templates/oam_cli_agent_mxn.service.j2"
dest: /usr/lib/systemd/system/oam_cli_agent_mxn.service
- name: "Templates application.properties.j2"
template:
src: "{{ role_path }}/templates/application.properties.j2"
dest: /opt/tsg/cli_agent/application.properties
#- name: "Start oam_cli_agent_mxn"
# systemd:
# name: oam_cli_agent_mxn
# state: restarted
# enabled: yes
# daemon_reload: yes
- name: "Start oam_cli_agent_mxn"
systemd:
name: oam_cli_agent_mxn
state: restarted
enabled: yes
daemon_reload: yes

View File

@@ -4,6 +4,7 @@ Description=oam_cli_agent daemon
[Service]
Type=simple
Environment=OAM_DIR=/opt/tsg/cli_agent/
ExecStartPre=/usr/bin/mkdir -p /tmp/__tsg_monitor_tmp_dir
ExecStart=/usr/lib/jvm/{{ java_version }}/bin/java -jar ${OAM_DIR}oam_cli_agent.jar --spring.config.location=${OAM_DIR}application.properties,${OAM_DIR}application-centos2.properties --logging.file=${OAM_DIR}oam-cli-agent.log
[Install]

View File

@@ -1,10 +1,10 @@
- name: "judge tsg_oam"
shell: mysql -uroot -p111111 -e "show databases;" |grep tsg_oam
shell: mysql -u {{ mariadb.username }} -p{{ mariadb.password }} -e "show databases;" |grep tsg_oam
register: return
ignore_errors: true
- name: "set tsg_oam database"
shell: mysql -uroot -p{{ mariadb.password }} -e "create database tsg_oam"
#- name: "set tsg_oam database"
# shell: mysql -uroot -p{{ mariadb.password }} -e "create database tsg_oam"
#when: return.rc != 0
- name: "Templates tsg_threshold.sql"

View File

@@ -13,31 +13,22 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_app
killall -9 -q tsg_monit_intercept
killall -9 -q tsg_monit_interface
#killall -9 -q tsg_monit_protocol_v3
killall -9 -q tsg_monit_protocol_v4
#killall -9 -q tsg_monit_stream_v3
killall -9 -q tsg_monit_stream_v4
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &
#/opt/tsg/tsg-monitor/tsg_update_tags > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_app > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_intercept > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_interface > /dev/null &
#/opt/tsg/tsg-monitor/tsg_monit_protocol_v3 > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_protocol_v4 > /dev/null &
#/opt/tsg/tsg-monitor/tsg_monit_stream_v3 > /dev/null &
/opt/tsg/tsg-monitor/tsg_monit_stream_v4 > /dev/null &
}

View File

@@ -57,6 +57,12 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -12,14 +12,11 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_interface
killall -9 -q tsg_monit_intercept
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &

View File

@@ -57,6 +57,11 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -12,14 +12,11 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_interface
killall -9 -q tsg_monit_intercept
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &

View File

@@ -57,6 +57,11 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -12,14 +12,11 @@ killall_uncompleted_cmd(){
killall -9 -q tsg_monit_interface
killall -9 -q tsg_monit_intercept
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_diagnose_background > /dev/null &

View File

@@ -57,6 +57,11 @@
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog

View File

@@ -8,21 +8,17 @@
#导致10秒内还不结束, 要强行杀掉,
#否则长时间运行后, 会有大量后台进程运行!
killall_uncompleted_cmd(){
killall -9 -q tsg_cluster_register
#killall -9 -q tsg_cluster_register
#killall -9 -q tsg_diagnose_background
#killall -9 -q tsg_update_tags
cmd_run=`ps -ef | grep tsg_ | grep -v grep`
if [ ${#cmd_run} -lt 1 ] ; then
#在没有其他tsg_xxx命令正在执行时, 删除执行命令错误被强制kill时, python生成的临时文件
rm -rf /tmp/_MEI*
fi
/opt/tsg/tsg-monitor/tsg_monitor_cleanup_tmp.sh
}
start_background_cmd(){
mkdir -p /tmp/__tsg_monitor_tmp_dir
#后台并发运行, 保证所有命令的开始运行时间基本一样,
#且不会因某个命令网络拥塞、执行时间长等问题阻塞while(1)主循环
/opt/tsg/tsg-monitor/tsg_cluster_register > /dev/null &
#/opt/tsg/tsg-monitor/tsg_cluster_register > /dev/null &
#mxn板只检测cpu, mem, disk等, 前台cli命令启用diagnose,
#后台服务依靠oam snmp模块, 无需运行tsg_diagnose_background

View File

@@ -4,6 +4,7 @@ Requires=consul-client.service
After=consul-client.service
[Service]
ExecStartPre=/usr/bin/mkdir -p /tmp/__tsg_monitor_tmp_dir
ExecStart=/opt/tsg/tsg-monitor/tsg_cluster_register
Type=oneshot
RemainAfterExit=yes

View File

@@ -74,29 +74,44 @@
dest: "/opt/tsg/etc/tsg_series.json"
tags: template
#2020-11-13 lijia modify
- name: "copy tsg-monitor.service to destination server"
synchronize:
src: "{{ role_path }}/files/tsg-monitor.service"
dest: "/usr/lib/systemd/system"
#2020-11-13 lijia add
- name: "copy tsg_cluster_register.service to destination server"
synchronize:
src: "{{ role_path }}/files/tsg_cluster_register.service"
dest: "/usr/lib/systemd/system"
# 2020-11-13 lijia close
#- name: "copy tsg-monitor.sh to destination server"
# copy:
# src: "{{ role_path }}/files/tsg-monitor.sh"
# dest: "/opt/tsg/tsg-monitor/"
# mode: 0755
- name: "copy tsg-monitor.sh to destination server"
copy:
src: "{{ role_path }}/files/tsg-monitor.sh"
dest: "/opt/tsg/tsg-monitor/"
mode: 0755
- name: "copy rsyslog.conf to destination server"
synchronize:
src: "{{ role_path }}/files/rsyslog.conf"
dest: "/etc/"
- name: "copy tsg_monitor_cleanup_tmp.sh to destination server"
synchronize:
src: "{{ role_path }}/../tsg-common-files/tsg_monitor_cleanup_tmp.sh"
dest: "/opt/tsg/tsg-monitor/"
- name: "restart rsyslog service"
systemd:
name: rsyslog
state: restarted
- name: "enable tsg-monitor service"
systemd:
name: tsg-monitor
enabled: yes
daemon_reload: yes
state: restarted
- name: "enable tsg_cluster_register.service"
systemd:

View File

@@ -0,0 +1,13 @@
#!/bin/sh
cd /tmp/__tsg_monitor_tmp_dir
if [ $? != 0 ]; then
#echo "cd to dir error!"
exit 1
fi
#cur_dir=`pwd`
#echo "cur dir is: $cur_dir"
find . -ctime +1 -type d -name "_MEI*" | xargs rm -rf

79
uninstall.yml Normal file
View File

@@ -0,0 +1,79 @@
---
- hosts: blade-mxn
tasks:
- name: "stop oam_core.service"
service:
name: oam_core
state: stopped
ignore_errors: yes
- name: "stop oam_snmp service"
service:
name: oam_snmp
state: stopped
ignore_errors: yes
- name: "stop oam_cli service"
service:
name: oam_cli
state: stopped
ignore_errors: yes
- name: "stop oam_cli_agent_mxn service"
service:
name: oam_cli_agent_mxn
state: stopped
ignore_errors: yes
- name: "stop consul-server-external service"
service:
name: consul-server-external
state: stopped
ignore_errors: yes
- name: "uninstall oam_core"
yum:
name: "oam_core"
state: absent
- name: "uninstall oam_cli"
yum:
name: "oam_cli"
state: absent
- name: "uninstall oam_snmp"
yum:
name: "oam_snmp"
state: absent
- name: "uninstall oam_cli_agent_mxn"
yum:
name: "oam_cli_agent_mxn"
state: absent
- name: "uninstall tsg-cli"
yum:
name: "tsg-cli"
state: absent
- hosts: all
tasks:
- name: "stop tsg-monitor service"
service:
name: tsg-monitor
state: stopped
ignore_errors: yes
- name: "uninstall tsg-cli"
yum:
name: "tsg-cli"
state: absent
- name: "uninstall oam_cli_agent"
yum:
name: "oam_cli_agent"
state: absent

13
uninstall_oam_core.yml Normal file
View File

@@ -0,0 +1,13 @@
---
- hosts: blade-mxn
tasks:
- name: "stop oam_core.service"
service:
name: oam_core
state: stopped
ignore_errors: yes
- name: "uninstall oam_core"
yum:
name: "oam_core"
state: absent