歡迎您光臨本站 註冊首頁

AIX系統主機日常檢查腳本

←手機掃碼閱讀     火星人 @ 2014-03-12 , reply:0
  

#!/bin/sh

#
# 首先載入配置文件,如果配置文件不存在,則報錯退出
#
SOURCE=$HOME/config/config
check_source()
{
if [ -r $SOURCE ]; then
    . $SOURCE
else
    echo "$(basename $0): Cannot locate the default setting file."
    exit 1
fi
}

#
# 定義報表頭
#
report_header()
{
HOSTIP=$(ifconfig -a | sed -n '2p' |awk '{print $2}')
HOSTNAME=$(hostname)
USER=`who am i | cut -d "   " -f1`
cat<<!
Hostname: $HOSTNAME       Server: $HOSTIP
User: $USER        Time: $(date +%Y'-'%m'-'%d' '%H':'%M':'%S)

                                 SYSTEM CHECK REPORT
                                 ===================
 
!
}

#
# 定義日誌文件存放的目錄和日誌文件名,將當前用戶目錄設置為LOG_PATH
#
LOG_PATH=$(echo $HOME)
LOG_FILE=$LOG_PATH/log`date +%Y%m%d%H%M%S`

#
# 備份歷史文件
#
cd $LOG_PATH
test -f log2007*
if [ "$?" -eq 0 ];then
    mv $LOG_PATH/log2007* $LOG_PATH/niyl/ >/dev/null 2>&1
else
    :
fi

#define temp directory ,if not exist,create temp directory first.
TEMP_PATH=$LOG_PATH/temp
if [ -d $TEMP_PATH ];then
 :
else
 mkdir $TEMP_PATH
fi

#
# 載入環境設置
#
check_source

#
# 輸出報表頭信息
#
report_header >>$LOG_FILE


# 檢查 CPU的使用情況,這裡使用vmstat命令
echo "***************************************** Check CPU *****************************************">>$LOG_FILE
vmstat 1 10 | awk '{print $0;if($1 ~ /^[0-9].*/) (totalcpu+=$16);(avecpu=100-totalcpu/10)}; END {print "The average usage of 
cpu is :"avecpu}' >$TEMP_PATH/cpu_info

cat $TEMP_PATH/cpu_info >>$LOG_FILE

cpu_used_pct=`cat $TEMP_PATH/cpu_info | grep "The average usage of cpu is" |awk -F ":" '{print $2}' `
if [ "$cpu_used_pct" -gt "$CPU_VALUE" ] ; then
    echo "LOG-Warnning:`date +%Y'-'%m'-'%d' '%H':'%M':'%S`, CPU負載超過閥值設置,請檢查系統!!">>$LOG_FILE
else
 echo "\t\t\t\t CPU負載正常!!">>$LOG_FILE
fi

#
# 內存使用監控,包括交換區的使用情況監控   ÷
#                              
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check memory useage *****************************************">>$LOG_FILE
cat $TEMP_PATH/cpu_info | awk '{print $0;if($1 ~ /^[0-9].*/) (totalpi+=$6)(totalpo+=$7)};\
END {if(totalpi<10 && totalpo<10) print "\t\t\t\tMemory負載正常!!"; if(totalpi>10 || totalpo>10) print "Memory負載異常,請檢
查系統!!"} '>>$LOG_FILE

#
# 檢查磁碟空間. ÷
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check disk space *****************************************">>$LOG_FILE
df -k >>$LOG_FILE
df -k |grep -v proc |grep -v Filesystem |awk '{x=1*$4}{print $1","$2","$3","$4","$5","$6","$7}'>$TEMP_PATH/disk_info

cat $TEMP_PATH/disk_info | grep -v '^#' | while read line
do
item1=$(echo $line | awk -F ',' '{print $1}')
item2=$(echo $line | awk -F ',' '{print $2}')
item3=$(echo $line | awk -F ',' '{print $3}')
item4=$(echo $line | awk -F ',' '{print $4}' |awk -F '%' '{print $1}')
item5=$(echo $line | awk -F ',' '{print $5}')
item6=$(echo $line | awk -F ',' '{print $6}')
item7=$(echo $line | awk -F ',' '{print $7}')
if [ "$item4" -gt "$DISK_VALUE" ]; then
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, 磁碟$item7\t剩餘空間不足,請處理!!" >>$LOG_FILE
else
    echo "\t\t\t\t 磁碟空間$item7\t\t使用正常!!" >>$LOG_FILE
fi
done

#
# 檢查磁碟的io進行監控,iostat
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check iostat *****************************************">>$LOG_FILE
iostat 1 3 >>$LOG_FILE

#
# 對網路流量進行監控,在這裡可以作一個主機列表,對每個主機ping檢查網路是否連通。
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check netstat *****************************************">>$LOG_FILE
netstat -i >>$LOG_FILE

#
# 檢查主機的告警日誌
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check system err *****************************************">>$LOG_FILE
errpt | head -10 >>$LOG_FILE
day=`date +%D |awk -F "/" '{print $1$2}'`
errpt | awk '{print $2}' | grep ^$day
if [ $? -eq 0 ] ; then
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The system has found a error today.Please check the error 
report." >>$LOG_FILE
else
    echo >>$LOG_FILE
    echo "\t\t\t\t There is no system error report today.System is OK!!" >>$LOG_FILE
fi

#
# 檢查HA的運行是否正常               
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check HACMP status *****************************************">>$LOG_FILE
/usr/es/sbin/cluster/clstat -o > $TEMP_PATH/ha_info
lssrc -g cluster >> $TEMP_PATH/ha_info
cat $TEMP_PATH/ha_info >>$LOG_FILE
echo >>$LOG_FILE
cat $TEMP_PATH/ha_info| grep "Node:" |awk -F ':' '{print $2,$3}' | awk '{print $1,$3}' | while read line
do
node=$(echo $line | awk '{print $1}')"'s"
echo $line |grep UP$ >/dev/null
if [ "$?" -eq 0 ]; then
 echo "\t\t\t\t The node $node is OK!!" >>$LOG_FILE
else
 echo "`date +%Y'-'%m'-'%d' '%H':'%M':'%S`,LOG-Warnning: The node $node status is DOWN ,it was terminated ." 
>>$LOG_FILE
fi
done

 


#
# 檢查資料庫主要的6個進程是否正常.  
#                                  
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check oracle process *****************************************">>$LOG_FILE
ps -ef | grep ora_ | grep -v grep | awk -F '-' '{print $2}'  | awk '{print $2}' >/$TEMP_PATH/ora_process_info
ps -ef | grep ora_ | grep -v grep >>$LOG_FILE

# background process ckpt
if [ `grep ora_ckpt_ora92 $TEMP_PATH/ora_process_info` ]; then
    COUNT=1
else
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_ckpt_ora92 was terminated!"  >>$LOG_FILE
fi

# background process dbwr
if [ `grep ora_dbw0_ora92 $TEMP_PATH/ora_process_info` ]; then
    COUNT=$((COUNT+1))
else
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_dbw0_ora92 was terminated !"  >>$LOG_FILE
fi

# background process reco
if [ `grep ora_reco_ora92 $TEMP_PATH/ora_process_info` ]; then
    COUNT=$((COUNT+1))
else
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_reco_ora92 was terminated !"  >>$LOG_FILE
fi

# background process lgwr
if [ `grep ora_lgwr_ora92 $TEMP_PATH/ora_process_info` ]; then
    COUNT=$((COUNT+1))
else
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_lgwr_ora92 was terminated !"  >>$LOG_FILE
fi

# background process pmon
if [ `grep ora_pmon_ora92 $TEMP_PATH/ora_process_info` ]; then
    COUNT=$((COUNT+1))
else
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_pmon_ora92 was terminated !"  >>$LOG_FILE
fi

# background process smon
if [ `grep ora_smon_ora92 $TEMP_PATH/ora_process_info` ]; then
    COUNT=$((COUNT+1))
else
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_smon_ora92 was terminated !"  >>$LOG_FILE
fi

if [ "$COUNT" -eq 6 ];then
    echo >>$LOG_FILE
    echo "\t\t\t\tThe main six Oracle processes is OK !!" >>$LOG_FILE
else
    :
fi


#
# Check the oracle tablespace.
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check oracle tablespace *****************************************">>$LOG_FILE
#su - oracle -c sqlplus dxh/dxh < /home/guest/dxhwh/niyl/tablespace_query.sql >>$LOG_FILE
sqlplus -s xxx/xxx <<!EOF > $TEMP_PATH/ts_info
set pagesize 100
set linesize 100
col status for a10
col tablespace_name for a20
col contents for a10
col "size(M)" for a15
col used for a15
col pct for a10
select  d.status, d.tablespace_name,
        TO_CHAR(NVL(a.bytes / 1024 /1024, 0),'99G999G990') "size(M)",
        TO_CHAR(NVL(a.bytes - NVL(f.bytes, 0),0)/1024/1024, '99G999G990D00')  used,
        TO_CHAR(NVL((a.bytes - NVL(f.bytes, 0)) / a.bytes * 100, 0), '990D00')||'%' pct
FROM sys.dba_tablespaces d,
        (select tablespace_name, sum(bytes) bytes from dba_data_files group by tablespace_name) a,
        (select tablespace_name, sum(bytes) bytes from dba_free_space group by tablespace_name) f
WHERE d.tablespace_name = a.tablespace_name(+)
AND d.tablespace_name = f.tablespace_name(+)
order by tablespace_name ;
exit
!EOF

cat $TEMP_PATH/ts_info>>$LOG_FILE
cat $TEMP_PATH/ts_info |grep ONLINE |awk '{print $2":"$3":"$4":"$5}' |while read line
do
ts_name=$(echo $line |awk -F ':' '{print $1}')
ts_total=$(echo $line |awk -F ':' '{print $2}')
ts_used=$(echo $line |awk -F ':' '{print $3}')
ts_used_pct=$(echo $line |awk -F ':' '{print $4}' |awk -F '%' '{print $1}'|awk -F '.' '{print $1}')
if [ "$ts_used_pct" -gt "$TS_VALUE" -o "$ts_used_pct" -eq "$TS_VALUE" ]; then
 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,表空間$ts_name 的剩餘空間緊張,請儘快清理表空間!"  >>$LOG_FILE
else
 echo "\t\t\t\t The tablespace of $ts_name\t is OK!!"  >>$LOG_FILE
fi
done

#
# Check the oracle Job.
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check oracle job *****************************************">>$LOG_FILE
sqlplus -s xxx/xxx <<!!ET >> $LOG_FILE
col job for 999
col last_date for a20
col next_date for a20
col what for a40
set linesize 120

select job,what,
       to_char(last_date,'yyyy-mm-dd hh24:mi:ss') last_date,
       to_char(next_date,'yyyy-mm-dd hh24:mi:ss') next_date,
       failures
from dba_jobs
order by job;
!!ET

sqlplus -s xxx/xxx <<!EOF > $TEMP_PATH/job_info
col flag for a5
col rou for 99999
select 'XXX' flag,job,failures,broken,round(next_date-sysdate,2)*100 rou from dba_jobs order by job;
!EOF

cat $TEMP_PATH/job_info |grep XXX |awk '{print $2,$3,$4,$5}' |while read line
do
jobnum=`echo $line | awk '{print $1}'`
failure=`echo $line | awk '{print $2}'`
broken=`echo $line | awk '{print $3}'`
round=`echo $line | awk '{print $4}'`
if [ "$jobnum" -eq 3 -o "$jobnum" -eq 4 ] ; then

 if [ "$failure" -eq 0 -a "$broken"="N" -a "$round" -le 100 ]; then
  echo "\t\t\t\tThe Job $jobnum is OK!!" >>$LOG_FILE
 else
  echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Job $jobnum was terminated !" >>$LOG_FILE
 fi
else
 if [ "$failure" -eq 0 -a "$broken"="N" -a "$round" -eq 0 ]; then
  echo "\t\t\t\tThe Job $jobnum is OK!!" >>$LOG_FILE
 else
  echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Job $jobnum was terminated !" >>$LOG_FILE
 fi
fi
done

 

#
# Check oracle alert log.
#
echo >>$LOG_FILE
echo >>$LOG_FILE
echo "***************************************** check oracle alert log *****************************************">>$LOG_FILE
tail -300 $ORACLE_BASE/admin/ora92/bdump/alert_ora92.log | grep -v Thread | \
grep -v Current | grep -v "`date +'%a %h'`" | grep -v ":[0-9][0-9]:"  >>$LOG_FILE

 

  



[火星人 ] AIX系統主機日常檢查腳本已經有602次圍觀

http://coctec.com/docs/unix/show-post-73902.html