This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-tsgx-hardware/NEZHA/master/AlertRule.json
2024-06-27 15:47:09 +08:00

302 lines
11 KiB
JSON

[
{
"Name":"TSG-OS container restart",
"Type":"1",
"Evaluation interval ":"30",
"Timeout(s)":"300",
"Expression/OID":"increase(kube_pod_container_status_restarts_total{container=~\"firewall|packet-io-engine|^telegraf.*|certstore|proxy|sce|bfdd|shaping\"}[5m])",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"0\"}]",
"Extension Label":"",
"Unit":"short",
"Duration":"1",
"Summary":"The container {{ $labels.container }} restarted.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS traffic steering too high",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"irate(phy_rx_bits_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
"Condition":"[{\"id\":2,\"weight\":2,\"operator\":\">=\",\"value\":\"160000000000\"}]",
"Extension Label":"",
"Unit":"bits/sec(SI)",
"Duration":"60",
"Summary":"Physical trafific steering to high.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS packet io nic rx drops (missed)",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"irate(phy_rx_missed_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"1000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"100\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"10\"}]",
"Extension Label":"",
"Unit":"packets/sec(IEC)",
"Duration":"120",
"Summary":"The nic rx drops due to missed by packet io.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS packet io nic rx error",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"irate(phy_rx_error_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
"Condition":"[{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"0.0\"}]",
"Extension Label":"",
"Unit":"packets/sec(IEC)",
"Duration":"60",
"Summary":"The nic rx error by packet io.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS physical component too hot",
"Type":"1",
"Evaluation interval ":"30",
"Timeout(s)":"300",
"Expression/OID":"node_hwmon_temp_celsius{service=\"prometheus-node-exporter\"}",
"Condition":"[{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"90.0\"}]",
"Extension Label":"",
"Unit":"short",
"Duration":"60",
"Summary":"The physical component is overheating.",
"Description":"Physical hardware component too hot\nVALUE = {{ $value }}\nLABELS: {{ $labels }}",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS node memory pressure",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\"}",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\"==\",\"value\":\"1.0\"}]",
"Extension Label":"",
"Unit":"short",
"Duration":"60",
"Summary":"Node memory pressure.",
"Description":"{{ $labels.node }} has MemoryPressure condition\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS node disk pressure",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"kube_node_status_condition{condition=\"DiskPressure\",status=\"true\"}",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\"==\",\"value\":\"1.0\"}]",
"Extension Label":"",
"Unit":"short",
"Duration":"60",
"Summary":"Node disk pressure.",
"Description":"{{ $labels.node }} has DiskPressure condition\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS host high cpu load",
"Type":"1",
"Evaluation interval ":"30",
"Timeout(s)":"300",
"Expression/OID":"100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"80.0\"}]",
"Extension Label":"",
"Unit":"percent(0-100)",
"Duration":"60",
"Summary":"The host is experiencing high CPU load.",
"Description":"CPU load is > 80%\nVALUE = {{ $value }}\nLABELS = {{ $labels }}",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS traffic engines rx drops",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"irate(rx_drops_total{service=\"packet-io-engine\"}[5m])",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"2000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"1000\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"200\"}]",
"Extension Label":"",
"Unit":"packets/sec(IEC)",
"Duration":"180",
"Summary":"The packet rx drops by traffic engines.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS packet io nic tx error",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"irate(phy_tx_error_total{service=\"packet-io-engine\", dev!~\"virtio_dign_(.*)\"}[5m])",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"500\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"50\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"0\"}]",
"Extension Label":"",
"Unit":"packets/sec(IEC)",
"Duration":"60",
"Summary":"The nic tx error by packet io.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS traffic engines tx drops",
"Type":"1",
"Evaluation interval ":"30",
"Timeout(s)":"300",
"Expression/OID":"irate(tx_drops_total{service=\"packet-io-engine\"}[5m]) + irate(ftx_missed_total{service=\"packet-io-engine\"}[5m])",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"3000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"1000\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"50\"}]",
"Extension Label":"",
"Unit":"short",
"Duration":"180",
"Summary":"The packet tx drops by traffic engines.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
},
{
"Name":"TSG-OS packet io nic rx drops (nobuf)",
"Type":"1",
"Evaluation interval ":"60",
"Timeout(s)":"300",
"Expression/OID":"irate(phy_rx_nobuf_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"1000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"100\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"10\"}]",
"Extension Label":"",
"Unit":"packets/sec(IEC)",
"Duration":"120",
"Summary":"The nic rx drops due to nobuf by packet io.",
"Description":"",
"Receiver":"",
"Notices":"",
"State":"1",
"Automatic expired":"1",
"Schedule":"0",
"Effective times":"1,2,3,4,5,6,7",
"Start time":"00:00",
"End time":"23:59",
"Active notification":"0",
"Expired notification":"0",
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
"Data link":"{\"dataLink\":[]}"
}
]