TSG-6776: TSG-OS-9140 增加 IPMI watchdog,当计算板不可用时通过硬件重启计算板

This commit is contained in:
luwenpeng
2021-06-25 10:07:06 +08:00
committed by fumingwei
parent f22ad8ccfa
commit b096930a87
6 changed files with 171 additions and 3 deletions

View File

@@ -52,3 +52,15 @@
systemd:
name: tsg-env
enabled: yes
# 禁用 IPMI
- name: "disable ipmi"
systemd:
name: ipmi
enabled: no
# 禁用 Watchdog
- name: "disable watchdog"
systemd:
name: watchdog
enabled: no

View File

@@ -52,3 +52,15 @@
systemd:
name: tsg-env
enabled: yes
# 禁用 IPMI
- name: "disable ipmi"
systemd:
name: ipmi
enabled: no
# 禁用 Watchdog
- name: "disable watchdog"
systemd:
name: watchdog
enabled: no

View File

@@ -190,3 +190,27 @@
src: "{{ role_path }}/templates/override_system_slice_mem.conf.j2"
dest: /etc/systemd/system/system.slice.d/50-MemoryLimit.conf
mode: 0644
# 启用 IPMI
- name: "copy ipmi.conf to /etc/sysconfig/ipmi"
copy:
src: "{{ role_path }}/templates/ipmi.conf.j2"
dest: /etc/sysconfig/ipmi
mode: 0644
- name: "enable ipmi"
systemd:
name: ipmi
enabled: yes
# 启用 Watchdog
- name: "copy watchdog.conf to /etc/watchdog.conf"
copy:
src: "{{ role_path }}/templates/watchdog.conf.j2"
dest: /etc/watchdog.conf
mode: 0644
- name: "enable watchdog"
systemd:
name: watchdog
enabled: yes

View File

@@ -0,0 +1,69 @@
## Path: Hardware/IPMI
## Description: Enable standard hardware interfaces (KCS, BT, SMIC)
## Type: yesno
## Default: "yes"
## Config: ipmi
# Enable standard hardware interfaces (KCS, BT, SMIC)
# You probably want this enabled.
# IPMI_SI disabled by OpenIPMI-modalias rpm scriplet
IPMI_SI=no
## Path: Hardware/IPMI
## Description: Enable /dev/ipmi0 interface, used by ipmitool, ipmicmd,
## Type: yesno
## Default: "yes"
## Config: ipmi
# Enable /dev/ipmi0 interface, used by ipmitool, ipmicmd,
# and other userspace IPMI-using applications.
# You probably want this enabled.
DEV_IPMI=yes
## Path: Hardware/IPMI
## Description: Enable IPMI_WATCHDOG if you want the IPMI watchdog
## Type: yesno
## Default: "no"
## Config: ipmi
# Enable IPMI_WATCHDOG if you want the IPMI watchdog
# to reboot the system if it hangs
IPMI_WATCHDOG=yes
## Path: Hardware/IPMI
## Description: Watchdog options - modinfo ipmi_watchdog for details
## Type: string
## Default: "timeout=60"
## Config: ipmi
# Watchdog options - modinfo ipmi_watchdog for details
# watchdog timeout value in seconds
# as there is no userspace ping application that runs during shutdown,
# be sure to give it enough time for any device drivers to
# do their cleanup (e.g. megaraid cache flushes)
# without the watchdog triggering prematurely
IPMI_WATCHDOG_OPTIONS="timeout=600 action=reset"
## Path: Hardware/IPMI
## Description: Enable IPMI_POWEROFF if you want the IPMI poweroff module to be loaded.
## Type: yesno
## Default: "no"
## Config: ipmi
# Enable IPMI_POWEROFF if you want the IPMI
# poweroff module to be loaded.
IPMI_POWEROFF=no
## Path: Hardware/IPMI
## Description: Enable IPMI_POWERCYCLE if you want the system to be power-cycled on reboot
## Type: yesno
## Default: "no"
## Config: ipmi
# Enable IPMI_POWERCYCLE if you want the system to be power-cycled (power
# down, delay briefly, power on) rather than power off, on systems
# that support such. IPMI_POWEROFF=yes is also required.
IPMI_POWERCYCLE=no
## Path: Hardware/IPMI
## Description: Enable "legacy" interfaces for applications
## Type: yesno
## Default: "no"
## Config: ipmi
# Enable "legacy" interfaces for applications
# Intel IMB driver interface
IPMI_IMB=no

View File

@@ -0,0 +1,51 @@
#ping = 172.31.14.1
#ping = 172.26.1.255
#interface = eth0
#file = /var/log/messages
#change = 1407
# Uncomment to enable test. Setting one of these values to '0' disables it.
# These values will hopefully never reboot your machine during normal use
# (if your machine is really hung, the loadavg will go much higher than 25)
#max-load-1 = 24
#max-load-5 = 18
#max-load-15 = 12
# Note that this is the number of pages!
# To get the real size, check how large the pagesize is on your machine.
#min-memory = 1
# With enforcing SELinux policy please use the /usr/libexec/watchdog/scripts/
# or /etc/watchdog.d/ for your test-binary and repair-binary configuration.
#repair-binary = /usr/sbin/repair
#repair-timeout =
#test-binary =
#test-timeout =
watchdog-device = /dev/watchdog
# Defaults compiled into the binary
#temperature-device =
#max-temperature = 120
# Defaults compiled into the binary
#admin = root
interval = 20
#logtick = 1
#log-dir = /var/log/watchdog
# This greatly decreases the chance that watchdog won't be scheduled before
# your machine is really loaded
realtime = yes
priority = 1
# When using custom service pid check with custom service
# systemd unit file please be aware the "Requires="
# does dependent service deactivation.
# Using "Before=watchdog.service" or "Before=watchdog-ping.service"
# in the custom service unit file may be the desired operation instead.
# See man 5 systemd.unit for more details.
#
# Check if rsyslogd is still running by enabling the following line
#pidfile = /var/run/rsyslogd.pid

View File

@@ -15,7 +15,7 @@ set -ex
package_to_install="@base @core @debugging @directory-client @guest-agents
@hardware-monitoring @network-file-system-client @performance @remote-system-management
grub2 epel-release efibootmgr ansible yum-utils ipmitool docker-ce docker-ce-cli containerd.io lrzsz python3 vconfig"
grub2 epel-release efibootmgr ansible yum-utils ipmitool docker-ce docker-ce-cli containerd.io lrzsz python3 vconfig watchdog"
locak_package_to_install="$projectdir/package/kernel-lt-$kernel_version.rpm
$projectdir/package/kernel-lt-devel-$kernel_version.rpm"