我是靠谱客的博主 阔达唇膏,最近开发中收集的这篇文章主要介绍shell实现监控以及告警,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

检查mysql状态,keepalived状态,java组件,redis状态和磁盘空间
比较适用的可能就df,redis和mysql的监控,但redis监控的点也比较少,java组件大概只适合我公司
整体思路就是通过查看组件的某些值,达到阈值就产生告警,生产告警文件,然后如果存在了告警文件,就向邮箱发送邮件,
我觉得优点在于简单容易实现,差不多只要用到mail和crontab就行
当时写的是第一版,还有好多地方可以优化

check_config.sh
#!/bin/sh

DATE=$(date +%Y-%m-%d %H:%M)

##mysql info
comm=`which mysql`
mysql_host="localhost"
mysql_user="root"
mysql_passwd="passwd"
mysql_db="mysqldb"
mysql_table="tbl_node_health_info"
key="nodeName,nodeIp,nodePort,status"
mysql_sql_status="Slave_IO_Running"
mysql_io_status="Slave_SQL_Running"

### keepalived
keepalived_conf="/etc/keepalived/keepalived.conf"
keepalived_init="/etc/init.d/keepalived.init"

### disk status 
###磁盘告警阈值
upper="80"
Ip="127.0.0.1"

### redis memory
#redis_home="/home/redis-4.0.11/src"
#redis_host="192.168.0.155"
#redis_port="10170"
#redis_passwd="passwd"
#mem_alarm="0.8"

###log dir
#script_dir=`pwd`
script_dir="/opt/omtools/CHECK"
mkdir -p ${script_dir}/log
kplog="${script_dir}/log/keepalived_alarm.log"
mysql_log="${script_dir}/log/mysql_data.log"
mysql_data_log="${script_dir}/log/mysql_alarm.log"
df_info_log="${script_dir}/log/df_info.log"
df_alarm_log="${script_dir}/log/df_alarm.log"
java_info_log="${script_dir}/log/java_info.log"
java_alarm_log="${script_dir}/log/java_alarm.log"
redis_info_log="${script_dir}/log/redis_info.log"
redis_alarm_log="${script_dir}/log/redis_alarm.log"

check_mysql_status()
{
   ${comm} -h ${mysql_host} -u${mysql_user} -p${mysql_passwd} -e"show databases"
}

get_mysql_table_info()
{
   ${comm} -h ${mysql_host} -u${mysql_user} -p${mysql_passwd} -e"use ${mysql_db};select ${key} from ${mysql_table};" | awk 'NR == 1 {next} {print $1,$2,$3,$4}'
}

get_mysql_status_info()
{
   ${comm} -h ${mysql_host} -u${mysql_user} -p${mysql_passwd} -e"show slave status G;"
}

get_java_status()
{
   jar_name=`awk '{print $1}' ${java_info_log}`
   jar_ip=`awk '{print $2}' ${java_info_log}`
   jar_port=`awk '{print $3}' ${java_info_log}`
   jar_status=`awk '{print $4}' ${java_info_log}`

   num=0
   for Status in ${jar_status}
   do
      num=$((num+1))
      if [[ "${Status}" != "OK" ]];then
         NAME=$(echo ${jar_name}|awk "{print $$num}")
         IP=$(echo ${jar_ip}|awk "{print $$num}")
         PORT=$(echo ${jar_port}|awk "{print $$num}")
         echo "${DATE}"
         echo "业务组件告警"
         echo "服务器地址:${IP}"
         echo "告警组件:${NAME}"
         echo "组件状态:${Status}"
         echo ""
      fi
    done
}

check_mysql_status_info()
{
  sql_status=`grep -w "${mysql_sql_status}" ${mysql_log} |awk '{print $2}'`
  io_status=`grep -w "${mysql_io_status}" ${mysql_log} | awk '{print $2}'`
  if [[ X"${sql_status}" != X"Yes" ]];then
     echo "${DATE}"
     echo "数据库SQL同步状态告警"
     echo "同步状态为:${sql_status}"
  fi

  if [[ X"${io_status}" != X"Yes" ]];then
     echo "${DATE}"
     echo "数据库IO同步状态告警"
     echo "同步状态为:${io_status}"
  fi
}

check_keepalived()
{
   if [[ -x "${keepalived_init}" ]];then
      kpstatus=`ps -elf |grep -v grep |grep -iw "keepalived -D" |wc -l`
      if [[ ${kpstatus} -ne 3 ]];then
         echo "keepalived未正常运行"
      fi
   else 
      echo "not found keepalived file ${keepalived_init}"
   fi

   if [[ -x ${keepalived_conf} ]];then
      vir_ip=`grep "virtual_server" ${keepalived_conf} |awk '{print $2}'`
      ip_list=`ip a |grep -i net|grep "${vir_ip}" |wc -l`
      if [[ ${ip_list} -ne 1 ]];then
         echo "keepalived虚拟IP未挂载在${Ip}"
      fi
   else
      echo "not found keepalived file ${keepalived_conf}"
   fi
}

check_df()
{
     #get the disk usage information
     df -kP|grep -v cdrom |grep -v "CD"> ${df_info_log}

     ##use 
     #usage_rate=`awk '{if($1!="Filesystem"&&NF>1&&($3!~"Used")){print $(NF-1)}}' ${df_info_log}|awk -F'%' '{print $1}'`
     usage_rate=`awk 'NR == 1 {next} {print $5}' ${df_info_log} |awk -F '%' '{print $1}'`
     ##Filesystem
     mounts_dir=`awk 'NR == 1 {next} {print $6}' ${df_info_log}`
     size_kb=`awk 'NR == 1 {next} {print $2}' ${df_info_log}`
     used_kb=`awk 'NR == 1 {next} {print $3}' ${df_info_log}`

     #check the disk usage exceed the limitation or not
     num=0
     for usage in ${usage_rate}
     do
         num=$((num+1))
         if [ `echo "$usage >= $upper"|bc` -eq 1 ];then
            mount_dir=$(echo ${mounts_dir}|awk "{print $$num}")
            size=$(echo ${size_kb}|awk "{print $$num}")
            used=$(echo ${used_kb}|awk "{print $$num}")
            echo "${DATE}"
            echo "服务器告警"
            echo "服务器地址:${IP}"
            echo "告警目录:${mount_dir}"
            echo "容量使用率:${usage}%"
            echo "告警目录总容量:${size} KB"
            echo "告警目录已用容量:${used} KB"
            echo "" 
         fi
     done
}

check_redis()
{
    if [[ -d ${redis_home} ]];then
       ${redis_home}/redis-cli -h ${redis_host} -p ${redis_port} -a "${redis_passwd}" info memory > ${redis_info_log}
       if [ $? == 0 ];then
       return 0
       fi
    fi
}

redis_info()
{
    maxmemory=`cat  ${redis_info_log} |grep -w "maxmemory" |awk -F ":" '{print $2}'| tr -d "r"`
    used_memory=`cat  ${redis_info_log} |grep -w "used_memory" |awk -F ":" '{print $2}' | tr -d "r"`
    used_memory_peak=`cat ${redis_info_log} |grep -w "used_memory_peak" |awk -F ":" '{print $2}'| tr -d "r"`

    if [[ ${maxmemory} != 0 ]];then
       mem_used=`echo "scale=3;${used_memory} / ${maxmemory}" |bc -l`
       res=`awk -v num1=${mem_alarm} -v num2=${mem_used} 'BEGIN{print(num1>num2)?"0":"1"}'`
       if [[ ${res} == 1 ]];then
          echo "redis 内存告警"
          echo "redis设置的最大内存为: ${maxmemory}KB"
          echo "redis当前使用内存为: ${used_memory}KB"
          echo "redis最大达到使用内存为: ${used_memory_peak}KB"
       fi
    fi
}

邮件告警 checkAlarm.sh

#!/bin/bash

SendMail()
{
     mail_info=$1
     for User in ${rec_user[@]}
     do
        mail -s "${email_titel}" -c ${send_user} ${User} < ${mail_info}
     done
}

#### /etc/mail.rc
#set from="xxxx@163.com" smtp="smtp.163.com"
#set smtp-auth-user="xxxx@163.com" smtp-auth-password="passwd"
#set smtp-auth=login

###  /etc/conrtab
#0 */1 * * * root /bin/bash 

DATE=$(date +%Y-%m-%d %H:%M)

### addressee
send_user="xxxx@163.com"
#rec_user="xxxx@qq.com"
rec_user="xxxx@163.com xxxxxx@163.com"


script_dir=`pwd`
Java_alarmfile="${script_dir}/log/java_alarm.log"
Df_alarmfile="${script_dir}/log/df_alarm.log"
KP_alarmfile="${script_dir}/log/keepalived_alarm.log"
MYSQL_alarmfile="${script_dir}/log/mysql_alarm.log"
email_titel="ACS Server Alarm"


if [[ -s ${Java_alarmfile} ]];then
   SendMail ${Java_alarmfile}
fi

if [[ -s ${Df_alarmfile} ]];then
   SendMail ${Df_alarmfile}
fi

if [[ -s ${KP_alarmfile} ]];then
   SendMail ${KP_alarmfile}
fi

if [[ -s ${MYSQL_alarmfile} ]];then
   SendMail ${MYSQL_alarmfile}
fi

使用一般系统自带的mail服务,端口默认是25,云上的机器25端口都是关闭的,所以可能需要申请开通

最后

以上就是阔达唇膏为你收集整理的shell实现监控以及告警的全部内容,希望文章能够帮你解决shell实现监控以及告警所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(60)

评论列表共有 0 条评论

立即
投稿
返回
顶部