优化ClickHouse/Druid系统监控指标
This commit is contained in:
5
clickhouse/monitor/README.md
Normal file
5
clickhouse/monitor/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
## Description
|
||||
- run_ck_monitor:放置/etc/cron.d目录下,每分钟执行一次
|
||||
- clickhouse_monitor.sh:输出ClickHouse系统监控指标,部署至每一台节点
|
||||
- 放置目录/opt/tsg/olap/clickhouse/monitor
|
||||
- 输出Metrics至/opt/tsg/olap/node-exporter/prom/clickhouse_metrics.prom文件
|
||||
55
clickhouse/monitor/clickhouse_exporter.sh
Executable file
55
clickhouse/monitor/clickhouse_exporter.sh
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Configuration
|
||||
CK_USER="default" # Change to your own ClickHouse username
|
||||
CK_PIN="galaxy2019"
|
||||
CK_HOST="127.0.0.1"
|
||||
CK_PORT="9001"
|
||||
METRICS_FILE="/opt/tsg/olap/node-exporter/prom/clickhouse_metrics.prom"
|
||||
TIMEOUT=100
|
||||
|
||||
# Query ClickHouse data
|
||||
query_clickhouse() {
|
||||
clickhouse-client -h "$CK_HOST" --port "$CK_PORT" -m -u "$CK_USER" --password "$CK_PIN" --max_execution_time="$TIMEOUT" --query="$1"
|
||||
}
|
||||
|
||||
# Check if ClickHouse service is running
|
||||
if pgrep -f "clickhouse-server/config.xml" > /dev/null; then
|
||||
echo "# HELP clickhouse_up ClickHouse service status (1=up, 0=down)" > $METRICS_FILE
|
||||
echo "# TYPE clickhouse_up gauge" >> $METRICS_FILE
|
||||
echo "clickhouse_up 1" >> $METRICS_FILE
|
||||
else
|
||||
echo "# HELP clickhouse_up ClickHouse service status (1=up, 0=down)" > $METRICS_FILE
|
||||
echo "# TYPE clickhouse_up gauge" >> $METRICS_FILE
|
||||
echo "clickhouse_up 0" >> $METRICS_FILE
|
||||
exit 1 # Exit the script if ClickHouse is not running
|
||||
fi
|
||||
|
||||
# Record start time
|
||||
start_time=$(date +%s)
|
||||
|
||||
# Query metrics
|
||||
process_count=$(query_clickhouse "SELECT count(*) FROM system.processes")
|
||||
merge_count=$(query_clickhouse "SELECT count(*) FROM system.merges")
|
||||
expired_parts_count=$(query_clickhouse "SELECT count() FROM system.parts WHERE delete_ttl_info_max < (now() - INTERVAL 3 DAY) AND delete_ttl_info_max > toDateTime('2000-01-01 00:00:00')")
|
||||
|
||||
# Record end time and calculate execution time
|
||||
end_time=$(date +%s)
|
||||
execution_time=$((end_time - start_time))
|
||||
|
||||
# Write Prometheus format metrics to the file
|
||||
{
|
||||
echo "# HELP ck_processes_count Number of running processes in ClickHouse"
|
||||
echo "# TYPE ck_processes_count gauge"
|
||||
echo "ck_processes_count $process_count"
|
||||
echo "# HELP ck_merges_count Number of running merges in ClickHouse"
|
||||
echo "# TYPE ck_merges_count gauge"
|
||||
echo "ck_merges_count $merge_count"
|
||||
echo "# HELP ck_connect_time Time taken to connect and run queries in seconds"
|
||||
echo "# TYPE ck_connect_time gauge"
|
||||
echo "ck_connect_time $execution_time"
|
||||
echo "# HELP ck_expired_parts_count Number of expired parts in ClickHouse"
|
||||
echo "# TYPE ck_expired_parts_count gauge"
|
||||
echo "ck_expired_parts_count $expired_parts_count"
|
||||
} >> "$METRICS_FILE"
|
||||
|
||||
3
clickhouse/monitor/run_ck_monitor
Normal file
3
clickhouse/monitor/run_ck_monitor
Normal file
@@ -0,0 +1,3 @@
|
||||
#Ansible: Check Clickhouse up to node_exporter
|
||||
*/1 * * * * root cd /opt/tsg/olap/clickhouse/monitor && sh clickhouse_exporter.sh
|
||||
|
||||
Reference in New Issue
Block a user