# Monitor ntpdate/xntp daemon syslog messages 
#

## standard preamble
type= single
continue= dontcont
ptype= nregexp
pattern= xntpd|ntpdate
desc= Eliminate events that aren't processed by this ruleset.
action= none

type= single
continue= dontcont
ptype= tvalue
pattern= TRUE
desc= See if other ruleset has processed this event
action= none
context= [EVENT_PROCESSED]

type= single
continue= takenext
ptype= tvalue
pattern= TRUE
desc= Stop other rulesets from processing this event
action= create EVENT_PROCESSED

# Report larger than normal changes for my site (> 1/4 second) by the
# daemon. Indicates bad clock, extreme temperature changes etc.
#
# Example:
# Dec 23 07:34:29 blade61.cs.umb.edu xntpd[313]: \
#      [ID 774427 daemon.notice] time reset (step) 0.770897 s
#
type= single
desc= report large xntpd corrections for host $1
continue= dontcont
ptype= regexp
context== (abs($2) > 0.25)
pattern= ([A-z0-9._-]+) xntpd\[[0-9]+\]:.*time reset \(step\) ([-]?[0-9.]+) s
action= pipe '$0' /usr/bin/mailx -s "xntpd: large time change on $1" admin

# Look for shutdown messages that are not followed by a startup
# within 1 minute.
# Example:
# Dec 23 02:27:53 blade61.example.org xntpd[457]: \
#      [ID 866926 daemon.notie] xntpd exiting on signal 15
#
# Dec 23 02:28:28 blade61.cs.umb.edu xntpd[305]: \
#      [ID 798731 daemon.notice] using kernel phase-lock loop 0041
#
type= pairwithwindow
desc= verify restart of xntp daemon on $1
ptype= regexp
pattern= ([A-z0-9._-]+) xntpd\[[0-9]+\]: .* xntpd exiting on signal 15
action= shellcmd /usr/bin/mailx -s "xntpd: server not restarted on $1" admin
ptype2= regexp
pattern2= $1 xntpd\[[0-9]+\]: .* using kernel phase-lock loop 0041
desc2= look for restart xntp on $1
action2= logonly
window= 60

# Ignore startup messages.
# Example:
# Dec 23 02:29:28 blade61.example.org xntpd[305]: \
#   [ID 301315 daemon.notice] tickadj= 5, tick= 10000, \
#   tvu_maxslew= 495, est. hz= 100
type= suppress
ptype= regexp
pattern= xntpd\[[0-9]*\]: (version|tickadj|precision) *= 

# Look for adjustment didn't complete messages see if there are more
#  than 3 of them in a 10 minute period. If so report it because it means
#  that time sync may no longer be within parameters.
# Example:
# Dec 23 02:29:28 blade61.example.org xntpd[305]: \
#    Previous time adjustment didn't complete
#
type= singlewiththreshold
continue= dontcont
desc= xntpd incomplete time adjustment on $1
ptype= regexp
pattern= ([A-z0-9._-]+) xntpd\[[0-9]*\]: Previous time adjustment didn't complete
action= shellcmd /usr/bin/mailx -s "excessive %s" admin
thresh= 3
window= 600

# Lost sync. Only important if we don't get a new sync within the next
# 10 minutes
#
type= pairwithwindow
continue= dontcont
ptype= regexp
pattern= ([^ ]*) xntpd\[([0-9]*)\]: synchronisation lost
desc= host_$1_pid_$2_xntp_sync_lost greater than 10 minutes
action= write xntpd.log "lost xntp sync on $1 for more than 10 minutes
desc2= host_$1_pid_$2_xntp_sync_lost less than 10 minutes
action2= none
ptype2= regexp
pattern2= ($1) xntpd\[($2)\]: synchronized to
window= 600

# The rest is of interest (unaccounted for messages)
# Handled by default rule
type= single
continue= dontcont
desc= xntp_error_$1
ptype= regexp
pattern= ([^ ]*) xntpd\[([0-9]*)\]:
action= write xntpd.log $0

#######
###
### NTPDATE
###
#######
# Look for ntpdate issues errors
#
#  If step is > .1 second, report
#
#
# Example input:
# Nov  4 12:01:30 corphost ntpdate[16587]: \
#  [ID 774510 daemon.notice] step time server 215.37.14.92 offset \
#  29.256618 sec
#
type= single
desc= report large ntpdate corrections for host $1
continue= dontcont
ptype= regexp
context== (abs($2) > 0.1)
pattern= ([A-z0-9._-]+) ntpdate\[[0-9]+\]:.*step time server.*offset ([-]?[0-9.]+)
action= pipe '$0' /bin/mailx -s \
                      "large ntpdate correction($2) on $1" admin

type= suppress
desc= ignore other ntpdate steps.
ptype= regexp
pattern= ([A-z0-9._-]+) ntpdate\[[0-9]+\]:.*step time server.*offset ([-]?[0-9.]+)

# Ignore ntpdate adjustments. These don't indicate a poorly performing
# clock, or a discontunity in time on the system. If they were large
# changes indicating a problem, the prior rule would have consumed the
# event before we see it.
#
# Example input:
# Nov  4 10:44:36 corphost2.example.com ntpdate[17793]: \
#    [ID 558275 daemon.notice] adjust time server 37.137.15.45 \
#    offset -0.002750 sec
type= suppress
desc= ignore ntpdate adjustments for host $1
continue= dontcont
ptype= regexp
pattern= ([A-z0-9._-]+) ntpdate\[[0-9]+\]:.*adjust time server.*offset ([-]?[0-9.]+)

# If we get here, we must not have handled the event in our ruleset.
type= single
continue= dontcont
ptype= tvalue
pattern= TRUE
desc= Allow event to be processed by other rulesets.
action= delete EVENT_PROCESSED

# publish