增加集群服务器ha配置;更新pyinstaller打包后找不到依赖库的bug.

This commit is contained in:
lijia
2020-10-20 22:46:23 +08:00
parent 2344df69ee
commit 25e4f39bcc
47 changed files with 679 additions and 30 deletions

41
adc166_170.deploy.yml Normal file
View File

@@ -0,0 +1,41 @@
- hosts: blade-mxn
roles:
- consul-external
- tsg-cli-mxn
- oam_cli_agent_mxn
- oam_cli
- oam_core
- oam_snmp
- hosts: blade-mcn0
roles:
#- consul-internal_mcn01
- telegraf
- influxdb
- grafana
- tsg-cli-mcn0
#- mariadb
- oam_cli_agent
- hosts: blade-mcn1
roles:
#- consul-internal_mcn01
- tsg-cli-mcn1
- oam_cli_agent
- hosts: blade-mcn2
roles:
#- consul-internal_mcn23
- tsg-cli-mcn2
- oam_cli_agent
- hosts: blade-mcn3
roles:
#- consul-internal_mcn23
- tsg-cli-mcn3
- oam_cli_agent
- hosts: blade-mxn
roles:
- oam_mxn_start

View File

@@ -1,12 +1,6 @@
- hosts: all
roles:
#- jdk1.8.0_73
- hosts: cluster_server
roles:
#- consul-cluster
#- mariadb
#- influxdb
- jdk1.8.0_73
- hosts: blade-mxn
roles:
@@ -20,7 +14,7 @@
- hosts: blade-mcn0
roles:
- consul-internal_mcn01
#- consul-internal_mcn01
- telegraf
- influxdb
- grafana
@@ -30,19 +24,19 @@
- hosts: blade-mcn1
roles:
- consul-internal_mcn01
#- consul-internal_mcn01
- tsg-cli-mcn1
- oam_cli_agent
- hosts: blade-mcn2
roles:
- consul-internal_mcn23
#- consul-internal_mcn23
- tsg-cli-mcn2
- oam_cli_agent
- hosts: blade-mcn3
roles:
- consul-internal_mcn23
#- consul-internal_mcn23
- tsg-cli-mcn3
- oam_cli_agent
@@ -53,3 +47,17 @@
- hosts: cluster_server
roles:
- oam_cluster
- consul-cluster
# mariadb复用毕方
#- mariadb
- influxdb
- ha_master
- hosts: cluster_server_backup
roles:
- oam_cluster
- consul-cluster-backup
# mariadb复用毕方
#- mariadb
- influxdb
- ha_backup

View File

@@ -0,0 +1,24 @@
[all:vars]
ansible_user=root
use_chassis_hardware_sn=true
[cluster_server]
192.168.44.56 influxdb_cluster=true
[cluster_server_backup]
192.168.44.57 influxdb_cluster=true
[blade-mxn]
192.168.40.170 tag_location=Almaty
[blade-mcn0]
192.168.40.166 tag_location=Almaty
[blade-mcn1]
192.168.40.167 tag_location=Almaty
[blade-mcn2]
192.168.40.168 tag_location=Almaty
[blade-mcn3]
192.168.40.169 tag_location=Almaty

View File

@@ -2,7 +2,8 @@ grafana:
http_port: 53000
influxdb:
cluster_ip: "10.3.60.8"
cluster_ip: "192.168.44.56"
cluster_ip_backup: "192.168.44.57"
dbname: tsg_stat
username: admin
passwd: tsg2019
@@ -13,7 +14,8 @@ influxdb:
consul:
datacenter: consul-yz
dckey: "XwXLAbVN1C44dLUVJ6UL5A=="
cluster_ip: "10.3.60.8"
cluster_ip: "192.168.44.56"
cluster_ip_backup: "192.168.44.57"
cluster_ethname: em1
external_ethname: enp2s0
mcn01_internal_ethname: ens1f3
@@ -22,15 +24,26 @@ consul:
mariadb:
username: root
password: 111111
cluster_ip: "10.3.60.7"
cluster_ip: "192.168.40.210"
oam:
snmptrapd_ip: "127.0.0.1"
snmptrapd_port: 162
mcn0_ip: "192.168.100.1"
tsg_cli:
rpm_file_name: "tsg-cli-1.0.17.19ebf72-1.el7.x86_64.rpm"
rpm_file_name: "tsg-cli-1.0.18.c5fdb96-1.el7.x86_64.rpm"
ha_master:
instance_state: MASTER
ethname: eth0
virtual_ip: "192.168.44.58"
ha_backup:
instance_state: BACKUP
ethname: eth0
virtual_ip: "192.168.44.58"
java_version: jdk1.8.0_73
bifang_api_ip: "10.3.60.7"
bifang_api_ip: "192.168.44.3"
bifang_api_port: 8080

View File

@@ -3,7 +3,10 @@ ansible_user=root
use_chassis_hardware_sn=false
[cluster_server]
127.0.0.1 influxdb_cluster=true
192.168.44.56 influxdb_cluster=true
[cluster_server_backup]
192.168.44.57 influxdb_cluster=true
[blade-mxn]
192.168.40.25 SN=CBT2201925000002 tag_location=xinxigang

View File

@@ -0,0 +1,6 @@
#!/bin/bash
#
killall -9 consul-replicate
cd /opt/consul-internal/bin/;./consul-replicate -prefix "device_info@consul-external_new" &>/dev/null &
cd /opt/consul-internal/bin/;./consul-replicate -prefix "device_list@consul-external_new" &>/dev/null &
cd /opt/consul-internal/bin/;./consul-replicate -prefix "tags@consul-external_new" &>/dev/null &

View File

@@ -0,0 +1,12 @@
{
"server" : true,
"datacenter" : "consul-ADC01",
"data_dir" : "/var/consul-external",
"encrypt" : "h1fHoHnJ+n+764ObqTNVjw==",
"disable_update_check" : true,
"bootstrap" : true,
"log_file" : "/var/consul-external/log/consul_external.log",
"retry_join" : ["192.168.200.5"],
"retry_interval" : "10s"
}

View File

@@ -0,0 +1,14 @@
# Systemd unit file for default tomcat
#
[Unit]
Description=Consul-cluster
After=network.target
[Service]
EnvironmentFile=/opt/consul-cluster/etc/systemd/consul.conf
ExecStartPre=/opt/consul-cluster/script/consul_bind_ip_generate.sh
ExecStart=/opt/consul-cluster/bin/consul agent -config-dir /opt/consul-cluster/etc/ -config-file /opt/consul-cluster/etc/consul/config-server.json -bind ${CONSUL_BIND_ADDRESS} -client 0.0.0.0
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,8 @@
#!/bin/bash
rm -rf /usr/lib/systemd/system/consul*service
rm -rf /usr/lib/systemd/system/consul*service
rm -rf /var/consul*
cp -f /opt/consul-cluster/etc/systemd/consul-server.service /usr/lib/systemd/system/consul-server-cluster.service
cp -f /opt/consul-cluster/bin/consul /usr/bin/
ln -sf /opt/consul-cluster/script/consul_path_setup.sh /etc/profile.d/

View File

@@ -0,0 +1,6 @@
#!/bin/bash
#BIND_ADDRESS=$(/usr/sbin/ip route | /usr/bin/grep default | head -n 1 | /usr/bin/awk '{print $5}' | /usr/bin/xargs ifconfig | /usr/bin/grep "inet" | /usr/bin/grep -v "inet6" | /usr/bin/awk '{print $2}')
BIND_ADDRESS=$(ifconfig ens34 | grep inet | head -1 |awk '{print $2}')
#BIND_ADDRESS=192.168.200.5
systemctl set-environment CONSUL_BIND_ADDRESS=${BIND_ADDRESS}

View File

@@ -0,0 +1,3 @@
PATH=/opt/consul-external/bin:${PATH}
export PATH

View File

@@ -0,0 +1,27 @@
- name: "copy consul-cluster to destination server"
copy:
src: "{{ role_path }}/files/"
dest: /opt
mode: 0755
- name: "Template consul_bind_ip_generate.sh"
template:
src: "{{ role_path }}/templates/consul_bind_ip_generate.sh.j2"
dest: /opt/consul-cluster/script/consul_bind_ip_generate.sh
tags: template
- name: "Template config-server.json"
template:
src: "{{ role_path }}/templates/config-server.json.j2"
dest: /opt/consul-cluster/etc/consul/config-server.json
tags: template
- name: "Install consul-cluster"
shell: cd /opt/consul-cluster;sh install.sh
- name: "Start consul-cluster"
systemd:
name: consul-server-cluster
state: restarted
enabled: yes
daemon_reload: yes

View File

@@ -0,0 +1,13 @@
{
"server" : true,
"datacenter" : "{{ consul.datacenter }}",
"data_dir" : "/var/consul-cluster",
"encrypt" : "{{ consul.dckey }}",
"disable_update_check" : true,
"bootstrap_expect": 2,
"log_level" : "info",
"log_file" : "/var/consul-cluster/log/consul_cluster.log",
"retry_join" : ["{{ consul.cluster_ip }}","{{ consul.cluster_ip_backup }}"],
"retry_interval" : "10s"
}

View File

@@ -0,0 +1,4 @@
#!/bin/bash
BIND_ADDRESS=$(ifconfig {{ consul.cluster_ethname }} | grep inet | head -1 |awk '{print $2}')
systemctl set-environment CONSUL_BIND_ADDRESS=${BIND_ADDRESS}

View File

@@ -6,7 +6,8 @@
"disable_update_check" : true,
"bootstrap" : true,
"log_file" : "/var/consul-cluster/log/consul_cluster.log",
"retry_join" : ["{{ consul.cluster_ip }}"],
"log_level" : "info",
"retry_join" : ["{{ consul.cluster_ip }}","{{ consul.cluster_ip_backup }}"],
"retry_interval" : "10s"
}

View File

@@ -8,7 +8,7 @@ After=network.target
[Service]
EnvironmentFile=/opt/consul-external/etc/systemd/consul.conf
ExecStartPre=/opt/consul-external/script/consul_bind_ip_generate.sh
ExecStart=/opt/consul-external/bin/consul agent -config-dir /opt/consul-external/etc/ -config-file /opt/consul-external/etc/consul/config-server.json -bind ${CONSUL_BIND_ADDRESS} -client 0.0.0.0
ExecStart=/opt/consul-external/bin/consul agent -config-dir /opt/consul-external/etc/ -config-file /opt/consul-external/etc/consul/config-client.json -bind ${CONSUL_BIND_ADDRESS} -client 0.0.0.0
[Install]
WantedBy=multi-user.target

View File

@@ -12,8 +12,10 @@
- name: "Template config-server.json"
template:
src: "{{ role_path }}/templates/config-server.json.j2"
dest: /opt/consul-external/etc/consul/config-server.json
#src: "{{ role_path }}/templates/config-server.json.j2"
#dest: /opt/consul-external/etc/consul/config-server.json
src: "{{ role_path }}/templates/config-client.json.j2"
dest: /opt/consul-external/etc/consul/config-client.json
tags: template
- name: "Install consul-external"

View File

@@ -0,0 +1,9 @@
{
"server" : false,
"datacenter" : "{{ consul.datacenter }}",
"data_dir" : "/var/consul-external",
"encrypt" : "{{ consul.dckey }}",
"disable_update_check" : true,
"retry_join" : ["{{ consul.cluster_ip }}","{{ consul.cluster_ip_backup }}"],
"retry_interval" : "10s"
}

Binary file not shown.

View File

@@ -0,0 +1,22 @@
#!/bin/bash
#running:0, stop:3
STATE=$(systemctl status oam_cluster.service)
if [ $STATE -ne 0 ]
then
systemctl restart oam_cluster
sleep 10
STATE=$(systemctl status oam_cluster.service)
if [ $STATE -ne 0 ]
then
killall keepalived
exit 1
else
exit 0
fi
else
exit 0
fi
#todo ,check consul_cluster, check influxdb

View File

@@ -0,0 +1,50 @@
! Configuration File for keepalived
global_defs {
router_id LVSTEST2
}
#监控服务httpd, mysql等
vrrp_script chk_http_service {
script "/etc/keepalived/chk_http_service.sh"
#每2s检查一次
interval 3
#每次检查-20
weight -10
fail 3
#失败次数如果请求失败2次就认为此节点资源发生故障将进行切换
rise 1
#监测成功就立即成功,如果请求一次成功就默认此节点资源恢复正常
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 10
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.44.244
}
#触发的脚本
track_script {
chk_http_service #检测脚本,上面配置的
}
}
virtual_server 192.168.44.244 80 {
delay_loop 6
lb_algo rr
lb_kind NAT
persistence_timeout 50
protocol TCP
#real_server 127.0.0.1 80 {
# weight 1
#}
}

View File

@@ -0,0 +1,46 @@
! Configuration File for keepalived
global_defs {
router_id LVSTEST1
}
#监控服务httpd, mysql等
vrrp_script chk_http_service {
script "/etc/keepalived/chk_http_service.sh"
#每2s检查一次
interval 3
#每次检查-20
weight -10
fail 3
#失败次数如果请求失败2次就认为此节点资源发生故障将进行切换
rise 1
#监测成功就立即成功,如果请求一次成功就默认此节点资源恢复正常
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.44.244
}
#触发的脚本
track_script {
chk_http_service #检测脚本,上面配置的
}
}
virtual_server 192.168.44.244 80 {
delay_loop 6
lb_algo rr
lb_kind NAT
persistence_timeout 50
protocol TCP
}

Binary file not shown.

View File

@@ -0,0 +1,16 @@
[Unit]
Description=LVS and VRRP High Availability Monitor
After=syslog.target network-online.target
[Service]
Type=forking
PIDFile=/var/run/keepalived.pid
KillMode=process
EnvironmentFile=-/etc/sysconfig/keepalived
ExecStart=/usr/sbin/keepalived $KEEPALIVED_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
RestartSec=30s
Restart=always
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,43 @@
- name: "copy keepalived rpm to destination server"
copy:
src: "{{ role_path }}/files/"
dest: /tmp
mode: 0755
- name: "install keepalived"
yum:
name:
- /tmp/keepalived-1.3.5-16.el7.x86_64.rpm
state: present
- name: "install ipvsadm"
yum:
name:
- /tmp/ipvsadm-1.27-8.el7.x86_64.rpm
state: present
- name: "Template check_service_health.sh.j2"
template:
src: "{{ role_path }}/templates/check_service_health.sh.j2"
dest: /etc/keepalived/check_service_health.sh
tags: template
- name: "Template keepalived_master.conf.j2"
template:
src: "{{ role_path }}/templates/keepalived_master.conf.j2"
dest: /etc/keepalived/keepalived.conf
tags: template
- name: Template the keepalived.service.j2
template:
src: "{{ role_path }}/files/keepalived.service.j2"
dest: "/usr/lib/systemd/system/keepalived.service"
tags: template
- name: "Start keepalived"
systemd:
name: keepalived.service
enabled: yes
state: restarted
enabled: yes

View File

@@ -0,0 +1,84 @@
#!/bin/bash
#running:0, stop:3
CONSUL_STATE=0
OAM_CLUSTER_STATE=0
INFLUXDB_STATE=0
check_consul_state(){
CONSUL_STATE=$(systemctl status oam_cluster.service)
if [ $CONSUL_STATE -ne 0 ]
then
return 1
else
return 0
fi
}
check_oam_cluster_state(){
OAM_CLUSTER_STATE=$(systemctl status oam_cluster.service)
if [ $OAM_CLUSTER_STATE -ne 0 ]
then
return 1
else
return 0
fi
}
check_influxdb_state(){
INFLUXDB_STATE=$(systemctl status influxd.service)
if [ $INFLUXDB_STATE -ne 0 ]
then
return 1
else
return 0
fi
}
#return value: 0:succ; 1:error
check_service_health(){
state = check_consul_state()
if [ $state -ne 0 ]
then
echo "consul service fail!"
return 1
fi
state = check_oam_cluster_state()
if [ $state -ne 0 ]
then
echo "oam_cluster service fail!"
return 1
fi
state = check_influxdb_state()
if [ $state -ne 0 ]
then
echo "influxd service fail!"
return 1
fi
return 0
}
check_service_health()
if [ $? ne 0]
then
fi
#检查到错误且重启之后, 再检查一遍
check_service_health()
if [ $? ne 0]
then
echo "check service health fail!"
exit 1
else
echo "check service health succ!"
exit 0
fi

View File

@@ -0,0 +1,50 @@
! Configuration File for keepalived
global_defs {
router_id LVSTEST2
}
#监控服务httpd, mysql等
vrrp_script chk_http_service {
script "/etc/keepalived/chk_http_service.sh"
#每2s检查一次
interval 3
#每次检查-20
weight -10
fail 3
#失败次数如果请求失败2次就认为此节点资源发生故障将进行切换
rise 1
#监测成功就立即成功,如果请求一次成功就默认此节点资源恢复正常
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 10
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.44.244
}
#触发的脚本
track_script {
chk_http_service #检测脚本,上面配置的
}
}
virtual_server 192.168.44.244 80 {
delay_loop 6
lb_algo rr
lb_kind NAT
persistence_timeout 50
protocol TCP
#real_server 127.0.0.1 80 {
# weight 1
#}
}

View File

@@ -0,0 +1,38 @@
! Configuration File for keepalived
global_defs {
router_id OAMHA
}
#监控服务httpd, mysql等
vrrp_script check_service_health {
script "/etc/keepalived/check_service_health.sh"
#每3s检查一次
interval 3
#每次检查-10
weight -10
fail 3
#失败次数如果请求失败2次就认为此节点资源发生故障将进行切换
rise 1
#监测成功就立即成功,如果请求一次成功就默认此节点资源恢复正常
}
vrrp_instance VI_1 {
state {{ ha_master.instance_state }}
interface {{ ha_master.ethname }}
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass oamha
}
virtual_ipaddress {
{{ ha_master.virtual_ip }}
}
#触发的脚本
track_script {
check_service_health #检测脚本,上面配置的
}
}

View File

@@ -5,8 +5,8 @@
- name: "copy Anaconda to destination server"
copy:
src: "{{ role_path }}/files/"
dest: /tmp
src: "{{ role_path }}/files/Anaconda3-2019.10-Linux-x86_64.sh"
dest: /tmp/Anaconda3-2019.10-Linux-x86_64.sh
when: return.rc != 0
- name: "install ipython"

View File

@@ -0,0 +1,12 @@
[Unit]
Description=oam_cli restart
[Service]
Type=oneshot
ExecStart=/usr/bin/mount --bind /opt/tsg/etc /home/tsgroot/opt/tsg/etc
ExecStart=/usr/bin/mount -o bind /proc /home/tsgroot/proc
ExecStart=/usr/bin/mount -o remount,rw,bind /proc /home/tsgroot/proc
[Install]
WantedBy=multi-user.target

View File

@@ -14,3 +14,14 @@
src: "{{role_path}}/templates/application.properties.j2"
dest: /home/tsgroot/opt/tsg/cli/application.properties
tags: template
- name: "copy oam_cli.service to destination server"
copy:
src: "{{ role_path }}/files/oam_cli.service"
dest: /usr/lib/systemd/system/oam_cli.service
- name: "restart oam_cli service"
systemd:
daemon_reload: yes
name: oam_cli
state: restarted

View File

@@ -9,7 +9,7 @@ tags.config=/opt/tsg/etc/tsg_tags.json
cli.agent.port=50222
# oam.influx.url=http://192.168.161.134:8086
oam.influx.url=http://{{ mcn0_ip }}:58086
oam.influx.url=http://{{ oam.mcn0_ip }}:58086
oam.influx.db={{ influxdb.dbname }}
oam.influx.user={{ influxdb.username }}
oam.influx.password={{ influxdb.passwd }}

BIN
roles/oam_core/files/mysql Normal file

Binary file not shown.

View File

@@ -24,8 +24,8 @@ ipmi.fru.mcn3=ipmitool -t 0x88 fru
network.data=/opt/tsg/oam_core/shell/network.sh
# 本机influx和mariadb地址
oam.local.server.mariadb.ip={{ mcn0_ip }}
oam.local.server.influxdb.ip={{ mcn0_ip }}
oam.local.server.mariadb.ip={{ oam.mcn0_ip }}
oam.local.server.influxdb.ip={{ oam.mcn0_ip }}
# 集群influx和mariadb地址
oam.cluster.server.mariadb.ip={{ mariadb.cluster_ip }}
oam.cluster.server.influxdb.ip={{ influxdb.cluster_ip }}

View File

@@ -25,4 +25,11 @@
enabled: yes
daemon_reload: yes
- name: "Start consul"
systemd:
name: consul-server-external
state: restarted
enabled: yes
daemon_reload: yes

View File

@@ -1,4 +1,4 @@
oam.influx.url=http://{{ mcn0_ip }}:58086
oam.influx.url=http://{{ oam.mcn0_ip }}:58086
oam.influx.db={{ influxdb.dbname }}
oam.influx.user={{ influxdb.username }}
oam.influx.password={{ influxdb.passwd }}

View File

@@ -22,6 +22,13 @@
username = "{{ influxdb.username }}"
password = "{{ influxdb.passwd }}"
[[outputs.influxdb]]
urls = ["http://{{ influxdb.cluster_ip_backup }}:58086"]
database = "{{ influxdb.dbname }}"
# HTTP Basic Auth
username = "{{ influxdb.username }}"
password = "{{ influxdb.passwd }}"
[[outputs.influxdb]]
urls = ["http://192.168.100.1:58086"]
database = "{{ influxdb.dbname }}"

View File

@@ -1,2 +1,2 @@
[MAAT]
ACCEPT_TAGS={"tags":[{"tag":"Location","value":"{{tag_location}}"}]}
ACCEPT_TAGS={"tags":[{"tag":"data_center","value":"{{tag_location}}"}]}

View File

@@ -1 +1 @@
{"sn": "GN191205CH1234567890"}
{"sn": "GN201020CH1234567890"}

View File

@@ -1 +1 @@
{"tags":[{"tag":"Location","value":"{{tag_location}}"}]}
{"tags":[{"tag":"data_center","value":"{{tag_location}}"}]}

69
uninstall.yml Normal file
View File

@@ -0,0 +1,69 @@
---
- hosts: blade-mxn
tasks:
- name: "stop oam_core.service"
service:
name: oam_core
state: stopped
ignore_errors: yes
- name: "stop oam_snmp service"
service:
name: oam_snmp
state: stopped
ignore_errors: yes
- name: "stop oam_cli service"
service:
name: oam_cli
state: stopped
ignore_errors: yes
- name: "stop oam_cli_agent_mxn service"
service:
name: oam_cli_agent_mxn
state: stopped
ignore_errors: yes
- name: "stop consul-server-external service"
service:
name: consul-server-external
state: stopped
ignore_errors: yes
- name: "uninstall oam_core"
yum:
name: "oam_core"
state: absent
- name: "uninstall oam_cli"
yum:
name: "oam_cli"
state: absent
- name: "uninstall oam_cli_agent_mxn"
yum:
name: "oam_cli_agent_mxn"
state: absent
- name: "uninstall tsg-cli"
yum:
name: "tsg-cli"
state: absent
- hosts: all
tasks:
- name: "stop tsg-monitor service"
service:
name: tsg-monitor
state: stopped
ignore_errors: yes
- name: "uninstall tsg-cli"
yum:
name: "tsg-cli"
state: absent