####### ### ### Analyze Reboot Sequence ### ####### # # Need to analyze a reboot sequence. # depending on how the system is rebooted we may have multiple # entry points. # We use a series of pair rules to save and suppress # reboot data until we see the SunOS banner. We use the trick # in the paper of triggering three actions: # On initial event receipt, on second event receipt, on timeout # if second event not received. # Note that this need to come near the beginning of the rules and before # rules that handle the syslog down/up events. ## standard preamble # skip this rule as the events we are looking at are clustered in time # rather than "space" or text content. So we run the first few rules # of this ruleset over all syslog entry lines. # #type=single #continue= dontcont #ptype= nregexp #pattern= HEARTBEAT|SEC_ #desc= Eliminate events that aren't processed by this ruleset. #action= none type= single continue= dontcont ptype= tvalue pattern= TRUE desc= See if other ruleset has processed this event action= none context= [EVENT_PROCESSED] type= single continue= takenext ptype= tvalue pattern= TRUE desc= Stop other rulesets from processing this event action= create EVENT_PROCESSED # this look like it will start most reboot sequences. type=pair desc=detect start of reboot sequence for $1 ptype = regexp pattern = ([.\w]+) pseudo: \[ID 129642 kern.info\] pseudo-device: tod0 context= !reboot_detected_$1 action = create pending_reboot_$1 120 (report reboot_detected_$1 \ /bin/mailx -s "Errors without reboot detected on $1" \ admin; \ delete reboot_detected_$1 ; \ reset %s); \ add reboot_detected_$1 $0 desc2 = confirmed that we have reboot for %1 ptype2 = regexp pattern2 = $1 genunix: \[ID 540533 kern.notice\] \^MSunOS Release [0-9.]+ Version .* 64-bit action2 = set pending_reboot_%1 300 (report reboot_detected_%1 \ /bin/mailx -s "Failed reboot (norm. start) after 5 minutes on %1" \ admin; \ delete reboot_detected_%1; ); \ add reboot_detected_%1 $0 ; \ event 0 DETECT_REBOOT_EVENTS %1 type = single desc = detect reboots starting with SunOS banner, host $1 ptype=regexp pattern = ([.\w]+) genunix: \[ID 540533 kern.notice\] \^MSunOS Release [0-9.]+ Version .* 64-bit action = set pending_reboot_$1 300 (report reboot_detected_$1 \ /bin/mailx -s "Failed reboot (late start) after 5 minutes on $1" \ admin; \ delete reboot_detected_$1; ); \ add reboot_detected_$1 $0 ; \ event 0 DETECT_REBOOT_EVENTS $1 # Rules below this point match events only if reboot_detected_ # exists. Capture other reboot events once we have determined that # they are part of a reboot. # Match the date stamp and hostname of a syslog line. type=single desc = ignore all rules below if not in reboot for $1. ptype=regexp pattern = ^[A-Z][a-z][a-z] +[0-9]+ [0-9:]+ ([.\w]+) context = ! reboot_detected_$1 action = none # trigger on event generated when SunOS boot banner seen and verify # key componets of reboot are occurring. # check ethernet id. action2 creation of interlocking context: # ethernet_address_detected_$1 could use an action on expiration. # type=pairwithwindow desc = look for reboot events ethernet $1 ptype = regexp pattern = ^DETECT_REBOOT_EVENTS ([.\w]+)$ continue=takenext action = report reboot_detected_$1 \ /bin/mailx -s \ "Failed to detect Ethernet address after 10 seconds on $1" \ admin desc2 = detect ethernet address context2 = reboot_detected_$1 ptype2=regexp pattern2 = ([.\w]+) genunix: \[ID 678236 kern.info\] Ethernet address = [0-9a-f:]+ action2 = create ethernet_address_detected_$1 30; \ add reboot_detected_$1 $0 window = 10 type=pairwithwindow desc = look for reboot events memory $1 ptype = regexp pattern = ^DETECT_REBOOT_EVENTS ([.\w]+)$ continue=takenext action = report reboot_detected_$1 \ /bin/mailx -s \ "Failed to locate memory detection after 10 seconds on $1" \ admin desc2 = detect ethernet address context2 = reboot_detected_%1 && ethernet_address_detected_%1 ptype2=regexp pattern2 = $1 unix: \[ID 389951 kern.info\] mem = [0-9]+K action2 = delete ethernet_address_detected_%1; \ create memory_detected_%1 30; \ add reboot_detected_%1 $0 window = 10 type=pairwithwindow desc = look for reboot events full-duplex $1 ptype = regexp pattern = ^DETECT_REBOOT_EVENTS ([.\w]+)$ continue=takenext action = report reboot_detected_$1 \ /bin/mailx -s \ "Failed to detect Ethernet address after 10 seconds on $1" \ admin desc2 = detect ethernet address context2 = reboot_detected_%1 && memory_detected_%1 ptype2=regexp pattern2 = $1 (hme|eri|bge): .* 1000? Mbps Full-Duplex Link Up action2 = delete memory_detected_%1; \ create full_duplex_up_detected_%1 30 ;\ add reboot_detected_%1 $0 window = 60 # Try to find the last element of the hardware boot sequence. # when we find it, kill all the contexts since # we have completed the reboot. A better idea is to set up an rc script # S99doneRebooting that uses syslog to generate a unique string once # all services etc are up. type=pairwithwindow desc = look for reboot events fd/end reboot $1 ptype = regexp pattern = ^DETECT_REBOOT_EVENTS ([.\w]+)$ action = report reboot_detected_$1 \ /bin/mailx -s \ "Failed to detect fd drive/complete reboot on $1" \ admin desc2 = Successful reboot for %1 context2 = reboot_detected_%1 && full_duplex_up_detected_%1 ptype2=regexp pattern2 = $1 genunix: \[ID 936769 kern.info\] fd0 action2 = delete full_duplex_up_detected_%1; \ delete pending_reboot_%1; \ delete reboot_detected_%1; \ logonly window = 60 type=single desc = capture reboot events pseudo/genunix for $1 ptype=regexp pattern = ([.\w]+) (pseudo:|genunix:) context = reboot_detected_$1 action = add reboot_detected_$1 $0 type=single desc = capture reboot events syslogd for $1 ptype=regexp pattern=([.\w]+) syslogd: going down on signal 15 context= reboot_detected_$1 action = add reboot_detected_$1 $0 # If we get here, we must not have handled the event. type= single continue= dontcont ptype= tvalue pattern= TRUE desc= unset EVENT_PROCESSED action= delete EVENT_PROCESSED