Summary: A daily script to check all sort of stuff on AIX.
Date: 27 December 2010
Refactor: 21 February 2025: Checked links and formatting.
#!/bin/bash ######################################################################################################################## # Author : Sjoerd Hooft # Date Initial Version: 27 Dec 2010 # Comments: sjoerd_@_warmetal_nl # # Description: # This is a sample script to perform the daily checks on AIX servers. # # Recommendations: # The script is designed for a 120 column terminal. # The running user must be able to do a passwordless sudo to root. # # Changes: # Please comment on your changes to the script (your name and email address, line number, description): ######################################################################################################################## # Script Variables HOSTNAME_SHORT=`hostname -s` AUTOMATIC=0 BASEDIR=`dirname $0` LOGFILE="$BASEDIR/dc.log" WHATAMI=`basename $0` DATE=`date +%Y%m%d` TOMAIL=sjoerd_@_warmetal_nl BOLD=`tput bold` BOLDOFF=`tput sgr0` # Directories APPDIR="/var/log/APP" WASDIR="/opt/WAS_Profiles/AppSrv/logs" FILE3DIR="/var/data/FILE3" FILE1DIR="/var/data/FILE1/log" JMSDIR="/var/data/app/jms_errors" TOMCATDIR="/var/log/app" # Oracle Variables ORACLE_HOME="/opt/oracle/product/10.2" ORACLE_BASE="/opt/oracle" ORACLE_SID_DB1=db1 ORACLE_SID_DB2=db2 export ORACLE_HOME ORACLE_BASE # Function to pause the script # The operator can evaluate the outcome of the previous function scriptContinue () { if [ "$AUTOMATIC" == "0" ]; then echo "Press ENTER to continue" read CONTINUE clear fi } # Function that will list the AIX internal errors checkErrors () { echo "$BOLD Listing the Error Logging Facility: $BOLDOFF" errpt echo } # Function that will clear all AIX internal errors clearErrors () { echo "Clearing Errors" sudo errclear 0 } # Function that will let the operator view the AIX internal errors in detail viewErrors () { echo "Viewing Errors" errpt -a | less } # Function that will remove all files from the protected directory that holds JMS/MQ errors removeJms () { echo "Are you sure you want to remove the JMS error files from $JMSDIR? " echo "If you hesitate, press CTRL+C to exit the script. " scriptContinue echo "Removing these files: " echo $JMSDIR/* sudo rm $JMSDIR/* echo echo "Done" echo } # Function that will check the last 4 logfiles from 4 different applications # This is possible with multiple for loops since the files are named similar # Known errors are being skipped # It will show only the last 10 entries per logfile checkLog-abs () { echo "$BOLD Checking abs-logs in $APPDIR $BOLDOFF " echo "Note: we check the last 4 logfiles and skip any known error, and limit the amount of lines to 10." for application in appserver1 appserver2 appserver3; do for logfile in app.log.4 app.log.3 app.log.2 app.log; do echo "Checking $BOLD $application-$logfile $BOLDOFF " cat $ABSDIR/$application-$logfile | grep ERROR | \ grep -v 'LDAP: error code 32 - No Such Object' | \ grep -v 'doRefreshProposalsResponse didn.d send the email caught - ignoring' | \ grep -v 'Error getting active tan: No TAN available for user' | \ grep -v 'CORBA OBJECT_NOT_EXIST' | \ tail -10 echo done scriptContinue clear done echo } # Function to check the SystemOut.log from the websphere applications # Known errors are being skipped # It will show only the last 10 entries per logfile checkLog-was () { echo "$BOLD Checking websphere logs in $WASDIR $BOLDOFF " for server in server1 server2 server3 server4; do echo "Checking $BOLD ${server}_Server/SystemOut.log $BOLDOFF " cat $WASDIR/${server}_Server/SystemOut.log | grep -i error | \ grep -v 'oracle.jdbc.driver.DatabaseError.throwSqlException' | \ grep -v 'The Network Adapter could not establish the connectionDSRA0010E' | \ grep -v 'Error creating XA Connection and Resource com.ibm.ws.exception.WsException: DSRA8100E' | \ grep -v 'Error creating XA Connection and Resource java.security.PrivilegedActionException:' | \ tail -10 echo scriptContinue done echo } # Function to check whether files have been processed. # They will have a different extention. checkFiles-host3 () { echo "$BOLD Checking the process on $HOSTNAME_SHORT $BOLDOFF " echo "There should be no files ending on .txt older than one hour:" echo "Last 10 files ending on .txt in $FILE3DIR:" ls -ltr $FILE3DIR | grep '\.txt$' | tail -10 echo echo "$BOLD Checking the process on $HOSTNAME_SHORT $BOLDOFF " echo "There should be recent (last 24 hours) files:" echo "Last 10 files in in $FILE3DIR:" ls -ltr $FILE3DIR | grep '\.txt' | tail -10 echo scriptContinue } # Function that will check whether error files exist # It will allow the operator, after examining the size, to delete them # Continue works only in this menu structure because this is the last check for this host checkFiles-host1 () { echo "$BOLD Checking MQ process error files on $HOSTNAME_SHORT $BOLDOFF " echo "Checking for jms (MQ) errors in $JMSDIR, there should be no files in this directory:" ls -ltr $JMSDIR if [ $AUTOMATIC == 0 ]; then JMSACTION=`ls -ltr $JMSDIR | wc -l` if [ $JMSACTION -gt 1 ]; then echo echo "${BOLD}There are files in this directory!$BOLDOFF If all files are really small ( < 100 bytes ) you can delete them. " echo " Would you like to do that right now?" echo echo "remove - remove all files in $JMSDIR" echo "continue - continue with dailycheck" echo menuChoice fi scriptContinue else echo "AUTOMATIC mode is on. If there are any files run the script manually on $HOSTNAME_SHORT " fi } # Function to check Oracle logfile bdump for errors # It will show the line with the error, as well as the 2 lines before and after # It will show only the last 10 entries per logfile checkLog-ora () { ORALOGDIR="/var/log/oracle/10.2/${ORACLE_SID}/bdump" echo "$BOLD Checking the Oracle logfile $ORALOGDIR/alert_$ORACLE_SID.log $BOLDOFF " echo "The last 10 ORA- messages are displayed, including the 2 lines before and the two lines after " sudo cat $ORALOGDIR/alert_$ORACLE_SID.log | sed -e ' 1{$!N;$d;} $!N;/ORA-/!D $!N;$d;N;p g;$!N;$d;N;D '| tail -10 echo scriptContinue } # Function to check tomcat application servers for errors # It will evaluate all logfiles created the last four days # Known errors are being skipped checkLog-tomcat () { echo "$BOLD Checking the tomcat application server logs on $HOSTNAME_SHORT $BOLDOFF " echo "$BOLD Checking Tomcat logfiles: $BOLDOFF" echo "Checking the last four days of $TOMCATDIR/applicaton.log files" find $TOMCATDIR/app/. -type f -name 'application*' -mtime -3 -print -exec cat {} \; | grep ERROR echo echo "Checking the last four days of $TOMCATDIR/framework.log files" find $TOMCATDIR/app/. -type f -name 'framework*' -mtime -3 -print -exec cat {} \; | grep ERROR echo scriptContinue } # Function to expand the options handling AIX system errors actionErrors () { menuStart checkErrors echo "Note: The system clears all hardware errors automatically after 90 days, and all other errors after 30 days." echo echo "clearerrors - clear all errors now" echo "viewerrors - review errors in less" } # Function to specify which host the script runs on # Declare host specific variables # Set the actions to be taken hostSpecific () { clear if [ "$HOSTNAME_SHORT" == "host1" ]; then checkLog-abs checkLog-was checkFiles-host1 fi if [ "$HOSTNAME_SHORT" == "host2" ]; then export ORACLE_SID=$ORACLE_SID_DB2 checkLog-ora fi if [ "$HOSTNAME_SHORT" == "host3" ]; then export ORACLE_SID=$ORACLE_SID_DB1 checkLog-ora checkLog-tomcat checkFiles-host3 fi } # Function to clear the screen and give the idea of a pretty script menuStart () { clear echo "########################################################################################################################" echo "################################################### Daily Check Menu ###################################################" echo } # Function to show the operator the default menu options menuEnd () { echo echo "errors - take further actions regarding errors" echo "host - start host specific checks" echo "auto - restarts the script and runs it automatically, after which the logfile is mailed to $TOMAIL " echo " - this also works from the commandline: $WHATAMI auto " echo echo "exit - exit" echo } # Function to read the menu option from the operator # This menu is used for all required menus in the script menuChoice () { echo "Enter menu choice: [exit]" read MENUCHOICE if [ -z "$MENUCHOICE" ]; then MENUCHOICE="exit" fi case $MENUCHOICE in errors ) actionErrors menuChoice ;; host ) hostSpecific ;; clearerrors ) clearErrors ;; viewerrors ) viewErrors ;; auto ) $BASEDIR/$WHATAMI auto exit ;; exit ) exit ;; remove ) removeJms ;; continue ) echo ;; * ) echo "Wrong Input" menuChoice ;; esac } # Function to mail the log when the script has run automatically mailLog () { cat $LOGFILE | mail -s "Report $WHATAMI on $HOSTNAME_SHORT of $DATE" $TOMAIL } # Function to determine whether the script should run automatically # Set the automatic variables to send the output to a logfile instead of a screen # and make the logfile readable by removing bold text markers # It also makes sure the logfile gets mailed if [ "$1" == "auto" ]; then AUTOMATIC=1 BOLD= BOLDOFF= exec > $LOGFILE 2>&1 checkErrors hostSpecific mailLog exit fi # Actual script: # Infinite while loop, as long the script is not exited, # start the menu, check for errors and ask the operator what to do while : do menuStart checkErrors menuEnd menuChoice done