# Monitor ntpdate/xntp daemon syslog messages # ## standard preamble type= single continue= dontcont ptype= nregexp pattern= xntpd|ntpdate desc= Eliminate events that aren't processed by this ruleset. action= none type= single continue= dontcont ptype= tvalue pattern= TRUE desc= See if other ruleset has processed this event action= none context= [EVENT_PROCESSED] type= single continue= takenext ptype= tvalue pattern= TRUE desc= Stop other rulesets from processing this event action= create EVENT_PROCESSED # Report larger than normal changes for my site (> 1/4 second) by the # daemon. Indicates bad clock, extreme temperature changes etc. # # Example: # Dec 23 07:34:29 blade61.cs.umb.edu xntpd[313]: \ # [ID 774427 daemon.notice] time reset (step) 0.770897 s # type= single desc= report large xntpd corrections for host $1 continue= dontcont ptype= regexp context== (abs($2) > 0.25) pattern= ([A-z0-9._-]+) xntpd\[[0-9]+\]:.*time reset \(step\) ([-]?[0-9.]+) s action= pipe '$0' /usr/bin/mailx -s "xntpd: large time change on $1" admin # Look for shutdown messages that are not followed by a startup # within 1 minute. # Example: # Dec 23 02:27:53 blade61.example.org xntpd[457]: \ # [ID 866926 daemon.notie] xntpd exiting on signal 15 # # Dec 23 02:28:28 blade61.cs.umb.edu xntpd[305]: \ # [ID 798731 daemon.notice] using kernel phase-lock loop 0041 # type= pairwithwindow desc= verify restart of xntp daemon on $1 ptype= regexp pattern= ([A-z0-9._-]+) xntpd\[[0-9]+\]: .* xntpd exiting on signal 15 action= shellcmd /usr/bin/mailx -s "xntpd: server not restarted on $1" admin ptype2= regexp pattern2= $1 xntpd\[[0-9]+\]: .* using kernel phase-lock loop 0041 desc2= look for restart xntp on $1 action2= logonly window= 60 # Ignore startup messages. # Example: # Dec 23 02:29:28 blade61.example.org xntpd[305]: \ # [ID 301315 daemon.notice] tickadj= 5, tick= 10000, \ # tvu_maxslew= 495, est. hz= 100 type= suppress ptype= regexp pattern= xntpd\[[0-9]*\]: (version|tickadj|precision) *= # Look for adjustment didn't complete messages see if there are more # than 3 of them in a 10 minute period. If so report it because it means # that time sync may no longer be within parameters. # Example: # Dec 23 02:29:28 blade61.example.org xntpd[305]: \ # Previous time adjustment didn't complete # type= singlewiththreshold continue= dontcont desc= xntpd incomplete time adjustment on $1 ptype= regexp pattern= ([A-z0-9._-]+) xntpd\[[0-9]*\]: Previous time adjustment didn't complete action= shellcmd /usr/bin/mailx -s "excessive %s" admin thresh= 3 window= 600 # Lost sync. Only important if we don't get a new sync within the next # 10 minutes # type= pairwithwindow continue= dontcont ptype= regexp pattern= ([^ ]*) xntpd\[([0-9]*)\]: synchronisation lost desc= host_$1_pid_$2_xntp_sync_lost greater than 10 minutes action= write xntpd.log "lost xntp sync on $1 for more than 10 minutes desc2= host_$1_pid_$2_xntp_sync_lost less than 10 minutes action2= none ptype2= regexp pattern2= ($1) xntpd\[($2)\]: synchronized to window= 600 # The rest is of interest (unaccounted for messages) # Handled by default rule type= single continue= dontcont desc= xntp_error_$1 ptype= regexp pattern= ([^ ]*) xntpd\[([0-9]*)\]: action= write xntpd.log $0 ####### ### ### NTPDATE ### ####### # Look for ntpdate issues errors # # If step is > .1 second, report # # # Example input: # Nov 4 12:01:30 corphost ntpdate[16587]: \ # [ID 774510 daemon.notice] step time server 215.37.14.92 offset \ # 29.256618 sec # type= single desc= report large ntpdate corrections for host $1 continue= dontcont ptype= regexp context== (abs($2) > 0.1) pattern= ([A-z0-9._-]+) ntpdate\[[0-9]+\]:.*step time server.*offset ([-]?[0-9.]+) action= pipe '$0' /bin/mailx -s \ "large ntpdate correction($2) on $1" admin type= suppress desc= ignore other ntpdate steps. ptype= regexp pattern= ([A-z0-9._-]+) ntpdate\[[0-9]+\]:.*step time server.*offset ([-]?[0-9.]+) # Ignore ntpdate adjustments. These don't indicate a poorly performing # clock, or a discontunity in time on the system. If they were large # changes indicating a problem, the prior rule would have consumed the # event before we see it. # # Example input: # Nov 4 10:44:36 corphost2.example.com ntpdate[17793]: \ # [ID 558275 daemon.notice] adjust time server 37.137.15.45 \ # offset -0.002750 sec type= suppress desc= ignore ntpdate adjustments for host $1 continue= dontcont ptype= regexp pattern= ([A-z0-9._-]+) ntpdate\[[0-9]+\]:.*adjust time server.*offset ([-]?[0-9.]+) # If we get here, we must not have handled the event in our ruleset. type= single continue= dontcont ptype= tvalue pattern= TRUE desc= Allow event to be processed by other rulesets. action= delete EVENT_PROCESSED # publish