利用NOAH的自定义脚本监控功能,写了一个脚本监控kafka的consumer offset lag,如果大于10000就报警。

脚本如下:

#!/bin/bash

old_IFS=$IFS
IFS=$'\n'
 # get all the consumer group of kafka
group_list=`/home/work/kafka/bin/kafka-consumer-groups.sh --bootstrap-server localhost:8092 --list`
for group in $group_list; do
	# get the offset lag of each group
	group_info=`/home/work/kafka/bin/kafka-consumer-groups.sh --bootstrap-server localhost:8092  --group ${group} --describe`

	# init variable
	group_name="GROUP"
	topic_name="TOPIC"
	total_lag=0

	for line in $group_info; do
	    group_name=`echo $line | awk '{print $1}'`
            if [[ $group_name == GROUP*  ||  $group_name == Consumer* ]] ; then
		# echo "$group_name is ignore.."
		continue
	    fi
	    topic_name=`echo $line | awk '{print $2}'`
            if [[ $topic_name == __consumer_offsets* ]] ; then
		# echo "$topic_name is ignore.."
		continue
	    fi

	    lag_of_this_partition=`echo $line | awk '{print $6}'`

	    total_lag=$((total_lag + lag_of_this_partition))
	done

        if [[ $group_name != GROUP*  &&  $group_name != Consumer* && $topic_name != __consumer_offsets* ]] ; then
	    key="kafkaOffsetLag-${group_name}-${topic_name}"
	    value=$total_lag
            if [[ $total_lag > 10000 ]]; then
                value="LARGE"
            else
                value="SMALL"
            fi
	    echo $key:$value
	fi
done

说明

1、if [[ $group_name == GROUP* || $group_name == Consumer* ]] 判断是为了过滤命令的header以及Kafka rebalancing时输出的提示信息:

Consumer group `crawled_html_data_HDFS_pusher_group` is rebalancing.

2、正常来说应该输出具体的lag,而不是简单的LARGE或者SMALL,但是因为NOAH的监控表达式不支持数字的判断,只有正则相等性匹配,所以只能这么搞了。。

regex_equal('kafkaOffsetLag.*', 'LARGE') 

补充:Kafka的监控命令

[work@kg-offline-01 kafka]$ bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --zookeeper localhost:2181 --group db_updator_group3  --topic kg_url_expand
[2017-02-28 15:38:45,221] WARN WARNING: ConsumerOffsetChecker is deprecated and will be dropped in releases following 0.9.0. Use ConsumerGroupCommand instead. (kafka.tools.ConsumerOffsetChecker$)
Group           Topic                          Pid Offset          logSize         Lag             Owner
db_updator_group3 kg_url_expand                  0   36951029        36951029        0               none
db_updator_group3 kg_url_expand                  1   36935997        36935997        0               none
db_updator_group3 kg_url_expand                  2   36949551        36949551        0               none
db_updator_group3 kg_url_expand                  3   36945975        36945975        0               none
db_updator_group3 kg_url_expand                  4   36950408        36950408        0               none
db_updator_group3 kg_url_expand                  5   36950323        36950323        0               none
db_updator_group3 kg_url_expand                  6   36937171        36937171        0               none
db_updator_group3 kg_url_expand                  7   36943758        36943758        0               none
db_updator_group3 kg_url_expand                  8   36946116        36946116        0               none
db_updator_group3 kg_url_expand                  9   36942941        36942941        0               none
db_updator_group3 kg_url_expand                  10  36948368        36948368        0               none
db_updator_group3 kg_url_expand                  11  36943450        36943450        0               none
db_updator_group3 kg_url_expand                  12  36950434        36950434        0               none
db_updator_group3 kg_url_expand                  13  36930054        36930054        0               none
db_updator_group3 kg_url_expand                  14  36930610        36930610        0               none

[work@kg-offline-01 kafka]$ bin/kafka-consumer-groups.sh --bootstrap-server localhost:8092 --list
rb_test_group
page_entity_consumer_group
pyspider_consumer_group
page_entity_HDFS_pusher_group
entity_consumer_group_2
original_page_entity_HDFS_pusher_group
KafkaManagerOffsetCache
crawled_html_data_HDFS_pusher_group
db_updator_group3
rb_test_group2
publish_event_gi_consumer_group
qa_kg_url_expand_group
rb_test_group1
original_page_entity_HDFS_pusher_group1
hdfs_publish_event_es_consumer_group2

[work@kg-offline-01 kafka]$vim  bin/kafka-consumer-groups.sh --bootstrap-server localhost:8092  --group db_updator_group3 --describe
GROUP                          TOPIC                          PARTITION  CURRENT-OFFSET  LOG-END-OFFSET  LAG             OWNER
db_updator_group3              kg_url_expand                  12         36950434        36950434        0               kafka-python-1.3.1_/10.194.163.51
db_updator_group3              kg_url_expand                  7          36943758        36943758        0               kafka-python-1.3.1_/10.194.163.51
db_updator_group3              kg_url_expand                  6          36937171        36937171        0               kafka-python-1.3.1_/10.194.164.14
db_updator_group3              kg_url_expand                  9          36942941        36942941        0               kafka-python-1.3.1_/10.194.164.14
db_updator_group3              kg_url_expand                  4          36950408        36950408        0               kafka-python-1.3.1_/10.194.164.14
db_updator_group3              kg_url_expand                  3          36945975        36945975        0               kafka-python-1.3.1_/10.194.163.51
db_updator_group3              kg_url_expand                  0          36951029        36951029        0               kafka-python-1.3.1_/10.194.163.51
db_updator_group3              kg_url_expand                  10         36948368        36948368        0               kafka-python-1.3.1_/10.194.163.51
db_updator_group3              kg_url_expand                  5          36950323        36950323        0               kafka-python-1.3.1_/10.194.164.14
db_updator_group3              kg_url_expand                  13         36930054        36930054        0               kafka-python-1.3.1_/10.194.163.51
db_updator_group3              kg_url_expand                  2          36949551        36949551        0               kafka-python-1.3.1_/10.194.164.14
db_updator_group3              kg_url_expand                  8          36946116        36946116        0               kafka-python-1.3.1_/10.194.163.51
db_updator_group3              kg_url_expand                  14         36930610        36930610        0               kafka-python-1.3.1_/10.194.164.14
db_updator_group3              kg_url_expand                  1          36935997        36935997        0               kafka-python-1.3.1_/10.194.164.14
db_updator_group3              kg_url_expand                  11         36943450        36943450        0               kafka-python-1.3.1_/10.194.164.14