2024-02-27 18:14:13 +08:00
|
|
|
[
|
|
|
|
|
{
|
|
|
|
|
"Name":"TSG-OS container restart",
|
|
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"30",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"increase(kube_pod_container_status_restarts_total{container=~\"firewall|packet-io-engine|^telegraf.*|certstore|proxy|sce|bfdd|shaping\"}[5m])",
|
|
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"0\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"short",
|
|
|
|
|
"Duration":"1",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Summary":"The container {{ $labels.container }} restarted.",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Name":"TSG-OS traffic steering too high",
|
|
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"irate(phy_rx_bits_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
|
|
|
|
|
"Condition":"[{\"id\":2,\"weight\":2,\"operator\":\">=\",\"value\":\"160000000000\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"bits/sec(SI)",
|
|
|
|
|
"Duration":"60",
|
|
|
|
|
"Summary":"Physical trafific steering to high.",
|
|
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-06-27 15:47:09 +08:00
|
|
|
"Name":"TSG-OS packet io nic rx drops (missed)",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Expression/OID":"irate(phy_rx_missed_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"1000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"100\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"10\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"packets/sec(IEC)",
|
|
|
|
|
"Duration":"120",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Summary":"The nic rx drops due to missed by packet io.",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-06-27 15:47:09 +08:00
|
|
|
"Name":"TSG-OS packet io nic rx error",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"irate(phy_rx_error_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
|
|
|
|
|
"Condition":"[{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"0.0\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"packets/sec(IEC)",
|
|
|
|
|
"Duration":"60",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Summary":"The nic rx error by packet io.",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Name":"TSG-OS physical component too hot",
|
|
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"30",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"node_hwmon_temp_celsius{service=\"prometheus-node-exporter\"}",
|
|
|
|
|
"Condition":"[{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"90.0\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"short",
|
|
|
|
|
"Duration":"60",
|
|
|
|
|
"Summary":"The physical component is overheating.",
|
|
|
|
|
"Description":"Physical hardware component too hot\nVALUE = {{ $value }}\nLABELS: {{ $labels }}",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Name":"TSG-OS node memory pressure",
|
|
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\"}",
|
|
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\"==\",\"value\":\"1.0\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"short",
|
|
|
|
|
"Duration":"60",
|
|
|
|
|
"Summary":"Node memory pressure.",
|
|
|
|
|
"Description":"{{ $labels.node }} has MemoryPressure condition\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Name":"TSG-OS node disk pressure",
|
|
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"kube_node_status_condition{condition=\"DiskPressure\",status=\"true\"}",
|
|
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\"==\",\"value\":\"1.0\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"short",
|
|
|
|
|
"Duration":"60",
|
|
|
|
|
"Summary":"Node disk pressure.",
|
|
|
|
|
"Description":"{{ $labels.node }} has DiskPressure condition\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Name":"TSG-OS host high cpu load",
|
|
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"30",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80",
|
|
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"80.0\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"percent(0-100)",
|
|
|
|
|
"Duration":"60",
|
|
|
|
|
"Summary":"The host is experiencing high CPU load.",
|
|
|
|
|
"Description":"CPU load is > 80%\nVALUE = {{ $value }}\nLABELS = {{ $labels }}",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-06-27 15:47:09 +08:00
|
|
|
"Name":"TSG-OS traffic engines rx drops",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"irate(rx_drops_total{service=\"packet-io-engine\"}[5m])",
|
|
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"2000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"1000\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"200\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"packets/sec(IEC)",
|
|
|
|
|
"Duration":"180",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Summary":"The packet rx drops by traffic engines.",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-06-27 15:47:09 +08:00
|
|
|
"Name":"TSG-OS packet io nic tx error",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Expression/OID":"irate(phy_tx_error_total{service=\"packet-io-engine\", dev!~\"virtio_dign_(.*)\"}[5m])",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"500\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"50\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"0\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"packets/sec(IEC)",
|
|
|
|
|
"Duration":"60",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Summary":"The nic tx error by packet io.",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-06-27 15:47:09 +08:00
|
|
|
"Name":"TSG-OS traffic engines tx drops",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"30",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"irate(tx_drops_total{service=\"packet-io-engine\"}[5m]) + irate(ftx_missed_total{service=\"packet-io-engine\"}[5m])",
|
|
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"3000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"1000\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"50\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"short",
|
|
|
|
|
"Duration":"180",
|
2024-06-27 15:47:09 +08:00
|
|
|
"Summary":"The packet tx drops by traffic engines.",
|
|
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Name":"TSG-OS packet io nic rx drops (nobuf)",
|
|
|
|
|
"Type":"1",
|
|
|
|
|
"Evaluation interval ":"60",
|
|
|
|
|
"Timeout(s)":"300",
|
|
|
|
|
"Expression/OID":"irate(phy_rx_nobuf_total{service=\"packet-io-engine\",dev!~\"virtio_dign_(.*)\"}[5m])",
|
|
|
|
|
"Condition":"[{\"id\":1,\"weight\":1,\"operator\":\">\",\"value\":\"1000\"},{\"id\":2,\"weight\":2,\"operator\":\">\",\"value\":\"100\"},{\"id\":3,\"weight\":3,\"operator\":\">\",\"value\":\"10\"}]",
|
|
|
|
|
"Extension Label":"",
|
|
|
|
|
"Unit":"packets/sec(IEC)",
|
|
|
|
|
"Duration":"120",
|
|
|
|
|
"Summary":"The nic rx drops due to nobuf by packet io.",
|
2024-02-27 18:14:13 +08:00
|
|
|
"Description":"",
|
|
|
|
|
"Receiver":"",
|
|
|
|
|
"Notices":"",
|
|
|
|
|
"State":"1",
|
|
|
|
|
"Automatic expired":"1",
|
|
|
|
|
"Schedule":"0",
|
|
|
|
|
"Effective times":"1,2,3,4,5,6,7",
|
|
|
|
|
"Start time":"00:00",
|
|
|
|
|
"End time":"23:59",
|
|
|
|
|
"Active notification":"0",
|
|
|
|
|
"Expired notification":"0",
|
|
|
|
|
"Trouble shooting":"<div class=\"editor-core ql-container ql-snow\"><div class=\"ql-editor\"><p><br></p></div></div>",
|
|
|
|
|
"Data link":"{\"dataLink\":[]}"
|
|
|
|
|
}
|
|
|
|
|
]
|