78 lines
4.4 KiB
Bash
Executable File
78 lines
4.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
druid_ip=127.0.0.1
|
|
druid_port=8089
|
|
druid_monitor_prom_file=/opt/tsg/olap/node-exporter/prom/druid_metrics.prom
|
|
|
|
task_name=$(find "../topology/tasks" -name "*.json")
|
|
|
|
# Clear the previous metrics file to avoid appending issues
|
|
> $druid_monitor_prom_file
|
|
|
|
# Function to retrieve supervisor status information
|
|
function getSupervisorStatus() {
|
|
druid_supervisor_healthy_num=$(curl -G -d 'state=true' -s $druid_ip:$druid_port/druid/indexer/v1/supervisor | jq '[ .[] | .state] | length')
|
|
echo "# HELP druid_supervisor_healthy_num Number of healthy supervisors in Druid." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_supervisor_healthy_num gauge" >> $druid_monitor_prom_file
|
|
echo druid_supervisor_healthy_num $druid_supervisor_healthy_num >> $druid_monitor_prom_file
|
|
|
|
druid_supervisor_unhealthy_num=$(curl -G -d 'state=true' -s $druid_ip:$druid_port/druid/indexer/v1/supervisor | jq '[ .[] | select(.state != "RUNNING")] | length')
|
|
echo "# HELP druid_supervisor_unhealthy_num Number of unhealthy supervisors in Druid." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_supervisor_unhealthy_num gauge" >> $druid_monitor_prom_file
|
|
echo druid_supervisor_unhealthy_num $druid_supervisor_unhealthy_num >> $druid_monitor_prom_file
|
|
}
|
|
|
|
# Function to retrieve Druid task status information
|
|
function getTaskStatus() {
|
|
druid_index_running_task_num=$(curl -G -d 'type=index_kafka' -s $druid_ip:$druid_port/druid/indexer/v1/runningTasks | jq '. | length')
|
|
echo "# HELP druid_index_running_task_num Number of running index_kafka tasks in Druid." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_index_running_task_num gauge" >> $druid_monitor_prom_file
|
|
echo druid_index_running_task_num $druid_index_running_task_num >> $druid_monitor_prom_file
|
|
|
|
druid_index_waiting_task_num=$(curl -G -d 'type=index_kafka' -s $druid_ip:$druid_port/druid/indexer/v1/waitingTasks | jq '. | length')
|
|
echo "# HELP druid_index_waiting_task_num Number of waiting index_kafka tasks in Druid." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_index_waiting_task_num gauge" >> $druid_monitor_prom_file
|
|
echo druid_index_waiting_task_num $druid_index_waiting_task_num >> $druid_monitor_prom_file
|
|
|
|
druid_index_pending_task_num=$(curl -G -d 'type=index_kafka' -s $druid_ip:$druid_port/druid/indexer/v1/pendingTasks | jq '. | length')
|
|
echo "# HELP druid_index_pending_task_num Number of pending index_kafka tasks in Druid." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_index_pending_task_num gauge" >> $druid_monitor_prom_file
|
|
echo druid_index_pending_task_num $druid_index_pending_task_num >> $druid_monitor_prom_file
|
|
|
|
druid_compact_pending_task_num=$(curl -G -d 'type=compact' -s $druid_ip:$druid_port/druid/indexer/v1/pendingTasks | jq '. | length')
|
|
echo "# HELP druid_compact_pending_task_num Number of pending compact tasks in Druid." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_compact_pending_task_num gauge" >> $druid_monitor_prom_file
|
|
echo druid_compact_pending_task_num $druid_compact_pending_task_num >> $druid_monitor_prom_file
|
|
|
|
druid_compact_waiting_task_num=$(curl -G -d 'type=compact' -s $druid_ip:$druid_port/druid/indexer/v1/waitingTasks | jq '. | length')
|
|
echo "# HELP druid_compact_waiting_task_num Number of waiting compact tasks in Druid." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_compact_waiting_task_num gauge" >> $druid_monitor_prom_file
|
|
echo druid_compact_waiting_task_num $druid_compact_waiting_task_num >> $druid_monitor_prom_file
|
|
|
|
}
|
|
|
|
# Function to check if the running index task count matches the specified count in the configuration
|
|
# If the running count is greater than or equal to the configured count, it is considered healthy (1), otherwise unhealthy (0)
|
|
function checkIndexStatus() {
|
|
druid_task_sum=0
|
|
for var in ${task_name[@]}; do
|
|
druid_task_num=$(cat ${var} | jq .ioConfig.taskCount)
|
|
druid_task_sum=$(expr $druid_task_num + $druid_task_sum)
|
|
done
|
|
|
|
druid_index_task_healthy_flag=1
|
|
if [ $druid_index_running_task_num -lt $druid_task_sum ]; then
|
|
druid_index_task_healthy_flag=0
|
|
fi
|
|
|
|
echo "# HELP druid_index_task_healthy_flag Health flag for index tasks in Druid (1 = healthy, 0 = unhealthy)." >> $druid_monitor_prom_file
|
|
echo "# TYPE druid_index_task_healthy_flag gauge" >> $druid_monitor_prom_file
|
|
echo druid_index_task_healthy_flag $druid_index_task_healthy_flag >> $druid_monitor_prom_file
|
|
}
|
|
|
|
# Call the functions to collect and write the metrics
|
|
getTaskStatus
|
|
getSupervisorStatus
|
|
checkIndexStatus
|
|
|