feature: add master rules and templates json.
This commit is contained in:
277
NEZHA/master/AlertRule.json
Normal file
277
NEZHA/master/AlertRule.json
Normal file
@@ -0,0 +1,277 @@
|
||||
[
|
||||
{
|
||||
"Name":"TSG-OS container restart",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"30",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"increase(kube_pod_container_status_restarts_total{container=~\"firewall|packet-io-engine|^telegraf.*|certstore|proxy|sce|bfdd|shaping\"}[5m])",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"0\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"short",
|
||||
"Duration":"1",
|
||||
"Summary":"container {{ $labels.container }} restarted",
|
||||
"Description":"",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS traffic steering too high",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"60",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"irate(phy_rx_bits_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
|
||||
"Condition":"[{\"id\":2,\"weight\":2,\"operator\":\">=\",\"value\":\"160000000000\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"bits/sec(SI)",
|
||||
"Duration":"60",
|
||||
"Summary":"Physical trafific steering to high.",
|
||||
"Description":"",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS packet-io rx missed",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"60",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"rate(phy_rx_missed_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"1000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"100\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"10\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"packets/sec(IEC)",
|
||||
"Duration":"120",
|
||||
"Summary":"Packet drops by NIC cards.",
|
||||
"Description":"",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS packet-io rx error",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"60",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"irate(phy_rx_error_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
|
||||
"Condition":"[{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"0.0\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"packets/sec(IEC)",
|
||||
"Duration":"60",
|
||||
"Summary":"Packet drops by CRC errors.",
|
||||
"Description":"",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS physical component too hot",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"30",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"node_hwmon_temp_celsius{service=\"prometheus-node-exporter\"}",
|
||||
"Condition":"[{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"90.0\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"short",
|
||||
"Duration":"60",
|
||||
"Summary":"The physical component is overheating.",
|
||||
"Description":"Physical hardware component too hot\nVALUE = {{ $value }}\nLABELS: {{ $labels }}",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS node memory pressure",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"60",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\"}",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\"==\",\"value\":\"1.0\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"short",
|
||||
"Duration":"60",
|
||||
"Summary":"Node memory pressure.",
|
||||
"Description":"{{ $labels.node }} has MemoryPressure condition\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS node disk pressure",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"60",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"kube_node_status_condition{condition=\"DiskPressure\",status=\"true\"}",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\"==\",\"value\":\"1.0\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"short",
|
||||
"Duration":"60",
|
||||
"Summary":"Node disk pressure.",
|
||||
"Description":"{{ $labels.node }} has DiskPressure condition\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS host high cpu load",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"30",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"80.0\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"percent(0-100)",
|
||||
"Duration":"60",
|
||||
"Summary":"The host is experiencing high CPU load.",
|
||||
"Description":"CPU load is > 80%\nVALUE = {{ $value }}\nLABELS = {{ $labels }}",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS packet-io rx drop",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"60",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"irate(rx_drops_total{service=\"packet-io-engine\"}[5m])",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"2000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"1000\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"200\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"packets/sec(IEC)",
|
||||
"Duration":"180",
|
||||
"Summary":"Packet drops by application for the poor performance.",
|
||||
"Description":"",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS packet-io tx missed",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"60",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"irate(phy_tx_error_total{service=\"packet-io-engine\"}[5m])",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"500\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"50\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"0\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"packets/sec(IEC)",
|
||||
"Duration":"60",
|
||||
"Summary":"Packet I/O TX missed.",
|
||||
"Description":"",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
},
|
||||
{
|
||||
"Name":"TSG-OS packet-io tx drops",
|
||||
"Type":"1",
|
||||
"Evaluation interval ":"30",
|
||||
"Timeout(s)":"300",
|
||||
"Expression/OID":"irate(tx_drops_total{service=\"packet-io-engine\"}[5m]) + irate(ftx_missed_total{service=\"packet-io-engine\"}[5m])",
|
||||
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"3000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"1000\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"50\"}]",
|
||||
"Extension Label":"",
|
||||
"Unit":"short",
|
||||
"Duration":"180",
|
||||
"Summary":"Packet TX Drops by applicaton poor performance.",
|
||||
"Description":"",
|
||||
"Receiver":"",
|
||||
"Notices":"",
|
||||
"State":"1",
|
||||
"Automatic expired":"1",
|
||||
"Schedule":"0",
|
||||
"Effective times":"1,2,3,4,5,6,7",
|
||||
"Start time":"00:00",
|
||||
"End time":"23:59",
|
||||
"Active notification":"0",
|
||||
"Expired notification":"0",
|
||||
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
||||
"Data link":"{\"dataLink\":[]}"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user