TSG-6776: TSG-OS-9140 增加 IPMI watchdog,当计算板不可用时通过硬件重启计算板
This commit is contained in:
@@ -52,3 +52,15 @@
|
||||
systemd:
|
||||
name: tsg-env
|
||||
enabled: yes
|
||||
|
||||
# 禁用 IPMI
|
||||
- name: "disable ipmi"
|
||||
systemd:
|
||||
name: ipmi
|
||||
enabled: no
|
||||
|
||||
# 禁用 Watchdog
|
||||
- name: "disable watchdog"
|
||||
systemd:
|
||||
name: watchdog
|
||||
enabled: no
|
||||
@@ -52,3 +52,15 @@
|
||||
systemd:
|
||||
name: tsg-env
|
||||
enabled: yes
|
||||
|
||||
# 禁用 IPMI
|
||||
- name: "disable ipmi"
|
||||
systemd:
|
||||
name: ipmi
|
||||
enabled: no
|
||||
|
||||
# 禁用 Watchdog
|
||||
- name: "disable watchdog"
|
||||
systemd:
|
||||
name: watchdog
|
||||
enabled: no
|
||||
@@ -190,3 +190,27 @@
|
||||
src: "{{ role_path }}/templates/override_system_slice_mem.conf.j2"
|
||||
dest: /etc/systemd/system/system.slice.d/50-MemoryLimit.conf
|
||||
mode: 0644
|
||||
|
||||
# 启用 IPMI
|
||||
- name: "copy ipmi.conf to /etc/sysconfig/ipmi"
|
||||
copy:
|
||||
src: "{{ role_path }}/templates/ipmi.conf.j2"
|
||||
dest: /etc/sysconfig/ipmi
|
||||
mode: 0644
|
||||
|
||||
- name: "enable ipmi"
|
||||
systemd:
|
||||
name: ipmi
|
||||
enabled: yes
|
||||
|
||||
# 启用 Watchdog
|
||||
- name: "copy watchdog.conf to /etc/watchdog.conf"
|
||||
copy:
|
||||
src: "{{ role_path }}/templates/watchdog.conf.j2"
|
||||
dest: /etc/watchdog.conf
|
||||
mode: 0644
|
||||
|
||||
- name: "enable watchdog"
|
||||
systemd:
|
||||
name: watchdog
|
||||
enabled: yes
|
||||
69
ansible/roles/system-init-9140/templates/ipmi.conf.j2
Normal file
69
ansible/roles/system-init-9140/templates/ipmi.conf.j2
Normal file
@@ -0,0 +1,69 @@
|
||||
## Path: Hardware/IPMI
|
||||
## Description: Enable standard hardware interfaces (KCS, BT, SMIC)
|
||||
## Type: yesno
|
||||
## Default: "yes"
|
||||
## Config: ipmi
|
||||
# Enable standard hardware interfaces (KCS, BT, SMIC)
|
||||
# You probably want this enabled.
|
||||
# IPMI_SI disabled by OpenIPMI-modalias rpm scriplet
|
||||
IPMI_SI=no
|
||||
|
||||
## Path: Hardware/IPMI
|
||||
## Description: Enable /dev/ipmi0 interface, used by ipmitool, ipmicmd,
|
||||
## Type: yesno
|
||||
## Default: "yes"
|
||||
## Config: ipmi
|
||||
# Enable /dev/ipmi0 interface, used by ipmitool, ipmicmd,
|
||||
# and other userspace IPMI-using applications.
|
||||
# You probably want this enabled.
|
||||
DEV_IPMI=yes
|
||||
|
||||
## Path: Hardware/IPMI
|
||||
## Description: Enable IPMI_WATCHDOG if you want the IPMI watchdog
|
||||
## Type: yesno
|
||||
## Default: "no"
|
||||
## Config: ipmi
|
||||
# Enable IPMI_WATCHDOG if you want the IPMI watchdog
|
||||
# to reboot the system if it hangs
|
||||
IPMI_WATCHDOG=yes
|
||||
|
||||
## Path: Hardware/IPMI
|
||||
## Description: Watchdog options - modinfo ipmi_watchdog for details
|
||||
## Type: string
|
||||
## Default: "timeout=60"
|
||||
## Config: ipmi
|
||||
# Watchdog options - modinfo ipmi_watchdog for details
|
||||
# watchdog timeout value in seconds
|
||||
# as there is no userspace ping application that runs during shutdown,
|
||||
# be sure to give it enough time for any device drivers to
|
||||
# do their cleanup (e.g. megaraid cache flushes)
|
||||
# without the watchdog triggering prematurely
|
||||
IPMI_WATCHDOG_OPTIONS="timeout=600 action=reset"
|
||||
|
||||
## Path: Hardware/IPMI
|
||||
## Description: Enable IPMI_POWEROFF if you want the IPMI poweroff module to be loaded.
|
||||
## Type: yesno
|
||||
## Default: "no"
|
||||
## Config: ipmi
|
||||
# Enable IPMI_POWEROFF if you want the IPMI
|
||||
# poweroff module to be loaded.
|
||||
IPMI_POWEROFF=no
|
||||
|
||||
## Path: Hardware/IPMI
|
||||
## Description: Enable IPMI_POWERCYCLE if you want the system to be power-cycled on reboot
|
||||
## Type: yesno
|
||||
## Default: "no"
|
||||
## Config: ipmi
|
||||
# Enable IPMI_POWERCYCLE if you want the system to be power-cycled (power
|
||||
# down, delay briefly, power on) rather than power off, on systems
|
||||
# that support such. IPMI_POWEROFF=yes is also required.
|
||||
IPMI_POWERCYCLE=no
|
||||
|
||||
## Path: Hardware/IPMI
|
||||
## Description: Enable "legacy" interfaces for applications
|
||||
## Type: yesno
|
||||
## Default: "no"
|
||||
## Config: ipmi
|
||||
# Enable "legacy" interfaces for applications
|
||||
# Intel IMB driver interface
|
||||
IPMI_IMB=no
|
||||
51
ansible/roles/system-init-9140/templates/watchdog.conf.j2
Normal file
51
ansible/roles/system-init-9140/templates/watchdog.conf.j2
Normal file
@@ -0,0 +1,51 @@
|
||||
#ping = 172.31.14.1
|
||||
#ping = 172.26.1.255
|
||||
#interface = eth0
|
||||
#file = /var/log/messages
|
||||
#change = 1407
|
||||
|
||||
# Uncomment to enable test. Setting one of these values to '0' disables it.
|
||||
# These values will hopefully never reboot your machine during normal use
|
||||
# (if your machine is really hung, the loadavg will go much higher than 25)
|
||||
#max-load-1 = 24
|
||||
#max-load-5 = 18
|
||||
#max-load-15 = 12
|
||||
|
||||
# Note that this is the number of pages!
|
||||
# To get the real size, check how large the pagesize is on your machine.
|
||||
#min-memory = 1
|
||||
|
||||
# With enforcing SELinux policy please use the /usr/libexec/watchdog/scripts/
|
||||
# or /etc/watchdog.d/ for your test-binary and repair-binary configuration.
|
||||
#repair-binary = /usr/sbin/repair
|
||||
#repair-timeout =
|
||||
#test-binary =
|
||||
#test-timeout =
|
||||
|
||||
watchdog-device = /dev/watchdog
|
||||
|
||||
# Defaults compiled into the binary
|
||||
#temperature-device =
|
||||
#max-temperature = 120
|
||||
|
||||
# Defaults compiled into the binary
|
||||
#admin = root
|
||||
interval = 20
|
||||
#logtick = 1
|
||||
#log-dir = /var/log/watchdog
|
||||
|
||||
# This greatly decreases the chance that watchdog won't be scheduled before
|
||||
# your machine is really loaded
|
||||
realtime = yes
|
||||
priority = 1
|
||||
|
||||
# When using custom service pid check with custom service
|
||||
# systemd unit file please be aware the "Requires="
|
||||
# does dependent service deactivation.
|
||||
# Using "Before=watchdog.service" or "Before=watchdog-ping.service"
|
||||
# in the custom service unit file may be the desired operation instead.
|
||||
# See man 5 systemd.unit for more details.
|
||||
#
|
||||
# Check if rsyslogd is still running by enabling the following line
|
||||
#pidfile = /var/run/rsyslogd.pid
|
||||
|
||||
@@ -15,7 +15,7 @@ set -ex
|
||||
|
||||
package_to_install="@base @core @debugging @directory-client @guest-agents
|
||||
@hardware-monitoring @network-file-system-client @performance @remote-system-management
|
||||
grub2 epel-release efibootmgr ansible yum-utils ipmitool docker-ce docker-ce-cli containerd.io lrzsz python3 vconfig"
|
||||
grub2 epel-release efibootmgr ansible yum-utils ipmitool docker-ce docker-ce-cli containerd.io lrzsz python3 vconfig watchdog"
|
||||
|
||||
locak_package_to_install="$projectdir/package/kernel-lt-$kernel_version.rpm
|
||||
$projectdir/package/kernel-lt-devel-$kernel_version.rpm"
|
||||
|
||||
Reference in New Issue
Block a user