#!/usr/bin/perl -w
############################################################
# $Id: psmon,v 1.8 2004/08/01 12:19:04 nicolaw Exp $
# psmon - Process Table Monitor Script
# Copyright: (c)2002,2003,2004 Nicola Worthington. All rights reserved.
############################################################
# This file is part of psmon.
#
# psmon is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# psmon is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with psmon; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
############################################################

=pod

=head1 NAME

psmon - Process Table Monitoring Script

=head1 VERSION

$Id: psmon,v 1.8 2004/08/01 12:19:04 nicolaw Exp $

=head1 SYNOPSIS

Single user account crontab operation.

    # DO NOT EDIT THIS FILE - edit the master and reinstall.
    # (/tmp/crontab.28945 installed on Wed Jan  8 16:29:24 2003)
    # (Cron version -- $Id: psmon,v 1.8 2004/08/01 12:19:04 nicolaw Exp $)
    MAILTO="nicolaworthington@msn.com"
    USER=nicolaw
     
    */5 * * * *    /sbin/psmon --daemon --cron --conf=~/etc/psmon.conf --user=$USER --adminemail=nicolaworthington@msn.com

Regular system-wide call from cron:

    */5 * * * *    /sbin/psmon --daemon --cron

Only check processes during working office hours:

    * 9-17 * * *   /sbin/psmon

Command line syntax.

 [nicolaw@nicolaw]$ psmon --help
 Syntax: psmon [--conf=filename] [--daemon] [--cron] [--user=user]
               [--adminemail=emailaddress] [--dryrun] [--help] [--version]
    --help            Display this help
    --version         Display full version information
    --dryrun          Dryrun (do not actually kill or spawn and processes)
    --daemon          Spawn in to background daemon
    --cron            Disables 'already running' errors with the --daemon option
    --conf=str        Specify alternative config filename
    --user=str        Only scan the process table for processes running as str
    --adminemail=str  Force all notification emails to be sent to str

=head1 DESCRIPTION

This script monitors the process table using Proc::ProcessTable, and
will respawn or kill processes based on a set of rules defined in an
Apache style configuration file.

Processes will be respawned if a spawn command is defined for a process,
and no occurances of that process are running. If the --user command line
option is specified, then the process will only be spawned if no instances
are running as the specified userid.

Processes can be killed off if they have been running for too long,
use too much CPU or memory resources, or have too many concurrent
versions running. Exceptions can be made to kill rulesets using the
I<pidfile> and I<lastsafepid> directives.

If a PID file is declared for a process, psmon will never kill the
process ID that is contained within the pid file. This is useful if for
example, you have a script which spawns hundreds of child processes
which you may need to automatically kill, but you do not want to kill
the parent process.

Any actions performed will be logged to the DAEMON syslog facility by default.
There is support to optionally also send notifications emails to an
administrator on a global or pre-rule basis.

=head1 OPERATION

=over 4

=item --dryrun

Execute a dry-run (do not actually kill or spawn and processes).

=item --conf=I<filename>

Specify alternative config filename.

=item --daemon

Spawn in to background daemon.

=item --cron

Disables already running warnings when trying to launch as another daemon.

=item --user=I<user>

Only scan the process table for processes running under this username.

=item --adminemail=I<emailaddress>

Force all notification emails to be sent to this email address.

=back

=head1 PREREQUISITES

In addition to Perl 5.005_03 or higher, the following Perl modules are
required:

    Getopt::Long
    Config::General
    POSIX
    Proc::ProcessTable
    Net::SMTP
    Unix::Syslog

=head1 INSTALLATION

The POSIX module is usually supplied with Perl as standard, as is
Getopt::Long. All these modules can be obtained from CPAN. Visit
http://search.span.org and http://www.cpan.org for further details.
For the lazy people reading this, you can try the
following command to install these modules:

    for m in Getopt::Long Config::General POSIX Proc::ProcessTable \
	 Net::SMTP Unix::Syslog;do perl -MCPAN -e"install $m";done

Alternatively you can run the install.sh script which comes in the
distribution tarball. It will attempt to install the right modules,
install the script and configuration file, and generate UNIX man page
documentation.

By default psmon will look for its runtime configuration in /etc/psmon.conf,
although this can be defined as otherwise from the command line. For system
wide installations it is reccomended that you install your psmon in to the
default location.

=cut



BEGIN {
	package psmon;
        use strict;
        sub try_to_load {
		my %self = @_;
                my $failure = '';
                while (my ($module,$args) = each %self) {
                        eval("use $module $args;");
                        $failure .= "Fatal error while trying to load Perl module '$module'\n" if $@;
                }
                (warn $failure) && exit 8 if $failure;
        }

        try_to_load(	'English'		=> '',
        		'Getopt::Long'		=> '',
        		'Config::General'	=> '',
        		'POSIX'			=> 'qw(uname getcwd)',
        		'Net::SMTP'		=> '',
        		'Unix::Syslog'		=> 'qw(:subs :macros)',
			'Proc::ProcessTable'	=> '',	);
}

# Define constants
use constant DEBUG      => 1;  # This should be reset back to 0 for public releases
use constant PREFIX	=> ''; # You may want to set this to /home/joeb or something
use constant HOSTNAME   => (POSIX::uname())[1];

# Declare global package variables
use subs qw(report loglevel logfacility log alert);
use vars qw($VERSION $SELF %O %C); # I want to move %O, and %C out of global space

# These English variables and globals are okay to stay
$WARNING		= 1;
$OUTPUT_AUTOFLUSH	= 1;
$ENV{PATH}		= '/bin:/usr/bin:/usr/local/bin';
($SELF = $PROGRAM_NAME)	=~ s|^.*/||;
$VERSION = sprintf "%d.%03d", q$Revision: 1.8 $ =~ /(\d+)/g;

# Get command line options
%O = (	conf		=> PREFIX.'/etc/psmon.conf',
	default_conf	=> PREFIX.'/etc/psmon.conf' );
GetOptions(\%O, 'help', 'version',
		'daemon', 'cron', 'dryrun',
		'conf=s', 'user=s', 'adminemail=s' );

# Display help or version info and exit if required
display_help(0) if exists $O{help};
display_version(0) if exists $O{version};

# Open syslog with PERROR (output to terminal)
openlog $SELF, LOG_PID | LOG_PERROR, logfacility;

# Bork if we've been told to query for an invalid user
if (exists $O{user} && (my $name = $O{user})) { $O{user} = scalar getpwnam($O{user}) || '';
	unless ($O{user}) {
		log LOG_CRIT, "Invalid user specified: '$name'";
		exit 2;
	}
}




=pod

=head1 CONFIGURATION

The default configuration file location is /etc/psmon.conf. A different
configuration file can be declared from the command line.

Syntax of the configuration file is based upon that which is used by
Apache. Each process to be monitored is declared with a Process scope
directive like this example which monitors the OpenSSH daemon:

    <Process sshd>
        spawncmd    /sbin/service sshd start
        pidfile     /var/run/sshd.pid
        instances   50
        pctcpu      90
    </Process>

There is a special I<*> process scope which applies to I<all> running
processes. This special scope should be used with extreme care. It does
not support the use of the I<spawncmd>, I<pidfile>, I<instances> or I<ttl>
directivers. A typical example of this scope might be as follows:

    <Process *>
        pctcpu    95
        pctmem    80
    </Process>

Global directives which are not specific to any one process should be placed
outside of any Process scopes.

=head2 DIRECTIVES

=over 4

=item Facility

Defines which syslog facility to log to. Valid options are as follows;
LOG_KERN, LOG_USER, LOG_MAIL, LOG_DAEMON, LOG_AUTH, LOG_SYSLOG, LOG_LPR,
LOG_NEWS, LOG_UUCP, LOG_CRON, LOG_LOCAL0, LOG_LOCAL1, LOG_LOCAL2,
LOG_LOCAL3, LOG_LOCAL4, LOG_LOCAL5, LOG_LOCAL6 and LOG_LOCAL7. Defaults
to LOG_DAEMON.

=item LogLevel

Defines the loglevel priority that notifications to syslog will be
marked as. Valid options are as follows; LOG_EMERG, LOG_ALERT, LOG_CRIT,
LOG_ERR, LOG_WARNING, LOG_NOTICE, LOG_INFO and LOG_DEBUG. The log level
used by a notification for any failed action will automatically be
raised to the next level in order to highlight the failure. May be also be used
in a Process scope which will take priority over a global declaration.
Defaults to LOG_NOTICE.

=item KillLogLevel (previously KillPIDLogLevel)

The same as the loglevel directive, but only applies to process kill actions.
Takes priority over the loglevel directive. May be also be used in a
Process scope which will take priority over a global declaration.
Undefined by default.

=item SpawnLogLevel

The same as the loglevel directive, but only applies to process spawn actions.
Takes priority over the loglevel directive. May be also be used in a
Process scope which will take priority over a global declaration.
Undefined by default.

=item AdminEmail

Defines the email address where notification emails should be sent to.
May be also be used in a Process scope which will take priority over a
global declaration. Defaults to root@localhost.

=item NotifyEmailFrom

Defines the email address that notification email should be addresses
from. Defaults to <username>@I<hostname>.

=item Frequency

Defines the frequency of process table queries. Defaults to 60 seconds.

=item LastSafePID

When defined, psmon will never attempt to kill a process ID which is
numerically less than or equal to the value defined by lastsafepid. It
should be noted that psmon will never attempt to kill itself, or a process ID
less than or equal to 1. Defaults to 100.

=item ProtectSafePIDsQuietly

Accepts a boolean value of On or Off. Surpresses all notifications of
preserved process IDs when used in conjunction with the I<lastsafepid>
directive. Defaults to Off.

=item SMTPHost

Defines the IP address or hostname of the SMTP server to used to send
email notifications. Defaults to localhost.

=item SMTPTimeout

Defines the timeout in seconds to be used during SMTP connections.
Defaults to 20 seconds.

=item SendmailCmd

Defines the sendmail command to use to send notification emails if there
is a failure with the SMTP connection to the host defined by smtphost.
Defaults to '/usr/sbin/sendmail -t'.

=item Dryrun

Forces this psmon to as if the --dryrun command line switch had specified.
This is useful if you want to force a specific configuration file to only
report and never actually take any automated action.

=item NotifyDetail

Defines the verbosity of notification emails which are sent. Can be set
to 'Simple', 'Verbose' or 'Debug'. Defaults to 'Verbose'. This function
will be removed soon. It is unnecessary bloat and is not very portable.

=back

=head2 PROCESS SCOPE DIRECTIVES

=over 4

=item SpawnCmd

Defines the full command line to be executed in order to respawn a dead
process.

=item KillCmd

Defines the full command line to be executed in order to gracefully
shutdown or kill a rogue process. If the command returns a boolean true
exit status then it is assumed that the command failed to execute
sucessfully. If no KillCmd is specified or the command fails, the
process will be killed by sending a SIGKILL signal with the standard
kill() function. Undefined by default.

=item PIDFile

Defines the full path and filename of a file created by a process which
contain it's main parent process ID.

=item TTL

Defines a maximum time to live (in seconds) of a process. The process
will be killed once it has been running longer than this value, and
it's process ID isn't contained in the defined pidfile.

=item PctCpu

Defines a maximum allowable percentage of CPU time a process may use.
The process will be killed once it's CPU usage exceeds this threshold
and it's process ID isn't contained in the defined pidfile.

=item PctMem

Defines a maximum allowable percentage of total system memory a process
may use. The process will be killed once it's memory usage exceeds this
threshold and it's process ID isn't contained in the defined pidfile.

=item Instances

Defines a maximum number of instances of a process which may run. The
process will be killed once there are more than this number of occurances
running, and it's process ID isn't contained in the defined pid file.

=item NoEmailOnKill

Accepts a boolean value of True or False. Surpresses process killing
notification emails for this process scope. Defaults to False.

=item NoEmailOnSpawn

Accepts a boolean value of True or False. Surpresses process spawning
notification emails for this process scope. Defaults to False.

=item NoEmail

Accepts a boolean value of True or False. Surpresses all notification
emails for this process scope. Defaults to False.

=item NeverKillPID

Accepts a space delimited list of PIDs which will never be killed.
Defaults to 1.

=item NeverKillProcessName

Accepts a space deliomited list of process names which will never be
killed. Defaults to 'kswapd kupdated mdrecoveryd'.

=back

=head2 EXAMPLES

    <Process syslogd>
        spawncmd       /sbin/service syslogd restart
        pidfile        /var/run/syslogd.pid
        instances      1
        pctcpu         70
        pctmem         30
    </Process>

Syslog is a good example of a process which can get a little full
of itself under certian circumstances, and excessively hog CPU and
memory. Here we will kill off syslogd processes if it exceeds 70%
CPU or 30% memory utilization.

Older running copies of syslogd will be killed if they are running,
while leaving the most recently spawned copy which will be listed in
the PID file defined.

    <Process httpd>
        spawncmd      /sbin/service httpd restart
        pidfile       /var/run/httpd.pid
        loglevel      critical
        adminemail    pager@noc.company.com
    </Process>

Here we are monitoring Apache to ensure that it is restarted if
it dies. The pidfile directive in this example is actually
redundant because we have not defined any rule where we should
consider killing any httpd processes.

All notifications relating to this process will be logged with the
syslog priority of critical (LOG_CRIT), and all emailed to
pager@noc.company.com which could typically forward to a pager.

Any failed attempts to kill or restart a process will automatically
be logged as a syslog priority one level higher than that specified.
If a restart of Apache were to fail in this example, a wall
notification would be broadcast to all interactive terminals
connected to the machine, since the next log priority up from
LOG_CRIT is LOG_EMERG.

    <Process find>
        noemail    True
        ttl        3600
    </Process>

Kill old find processes which have been running for over an hour.
Do not send an email notification since it's not too important.

=cut




# Read the config file and setup signal handlers
%C = read_config($O{conf});
$O{dryrun} = 1 if $C{dryrun};
if ($C{disabled}) {
	log LOG_CRIT, "Your configuration file '$O{conf}' is disabled. Remove the 'Disabled True' directive from the file.";
	exit 3;
}



=pod

=head1 SIGNALS

=over 4

=item HUP

Forces an immediate reload of the configuration file. You should
send the HUP signal when you are running psmon as a background
daemon and have altered the psmon.conf file.

=item USR1

Forces an immediate scan of the process table.

=back

=head1 EXIT CODES

=over 4

=item Value 0: Exited gracefully

The program exited gracefully.

=item Value 2: Failure to lookup UID for username

The username specified by the --user command line option did not resolve to a valid
UID.

=item Value 3: Configuration file is disabled

The configuration file is disabled. (It contains an active 'Disabled' directive).

=item Value 4: Configuration file does not exist

The specified configuration file, (default or user specified) does not exist.

=item Value 5: Unable to open PID file handle

Failed to open a read-only file handle for the runtime PID file.

=item Value 6: Failed to fork

An error occured while attempting to fork the child background daemon process.

=item Value 7: Unable to open PID file handle

Failed to open a write file handle for the runtime PID file.

=item Value 8: Failure to load Perl module

One or more Perl module could not be loaded. This usually happens when one of the
required Perl modules which psmon depends upon is not installed or could not be located
in the Perl LIB search path.

=back

=head1 PERFORMANCE

psmon is not especially fast. Much of it's time is spent reading the process table.
If the process table is particularly large this can take a number of seconds.
Although is rarely a major problem on todays speedy machines, I have run a few tests
so you take look at the times and decide if you can afford the wait.

 CPU             OS              Open Files/Procs    1m Load    Real Time
 PIII 1.1G       Mandrake 9.0         10148 / 267       0.01     0m0.430s
 PIII 1.2G       Mandrake 9.0         16714 / 304       0.44     0m0.640s
 Celeron 500     Red Hat 6.1           1780 /  81       1.27     0m0.880s
 PII 450         Red Hat 6.0            300 /  23       0.01     0m1.050s
 2x Xeon 1.8G    Mandrake 9.0         90530 / 750       0.38     0m1.130s
 Celeron 500     Red Hat 6.1           1517 /  77       1.00     0m1.450s
 PIII 866        Red Hat 8.0           3769 /  76       0.63     0m1.662s
 PIII 750        Red Hat 6.2            754 /  35       3.50     0m2.170s

(Figures are accurate as of release 1.0.3).

These production machines were running the latest patched stock distribution kernels.
I have listed the total number of open file descriptors, processes running and 1 minute
load average to give you a slightly better context of the performance.

=cut

# Run a single check
unless (exists $O{daemon}) {
	# Reopen syslog without PERROR (no output to terminal)
	closelog;
	openlog $SELF, LOG_PID, logfacility $C{facility};

	# Run a single check
	check_processtable(exists $O{user} ? $O{user} : '');

# Run as a daemon
} else {
	# Read the config file and setup signal handlers
	$SIG{'HUP'} = sub {
			log LOG_NOTICE, 'Recieved SIGHUP; reloading configuration';
			%C = read_config($O{conf});
		};
	$SIG{'USR1'} = sub {
			log LOG_NOTICE, 'Recieved SIGUSR1; checking process table immediately';
			check_processtable(exists $O{user} ? $O{user} : '');
		};

	# Figure out the PID file name
	my ($x,$y) = (POSIX::getcwd.$O{conf},0);
	for (0..length($x)-1) { $y += ord substr($x,$_,1); }
	my $pidfile = sprintf("%s/var/run/%s%s.pid", PREFIX,
	                        $SELF,$O{conf} eq $O{default_conf}
	                        ? '' : ".$y"
	                );

	#if (DEBUG) {
	#	print "\$O{conf} = $O{conf}\n";
	#	print "\$O{default_conf} = $O{default_conf}\n";
	#	print "\$pidfile = $pidfile\n";
	#}

	# Launch in to the background
	daemonize($pidfile);

	# Reopen syslog without PERROR (no output to terminal)
	closelog;
	openlog $SELF, LOG_PID, logfacility $C{facility};

	# Die if you remove the runtime PID file 
	while (-f $pidfile) {
		check_processtable(exists $O{user} ? $O{user} : '');
		sleep $C{frequency};
	}
}

# Finish
log LOG_NOTICE, "Terminating.\n";
closelog;
exit;





########################################
# User subroutines

=pod

=head1 SUBROUTINES

=over 4

=item check_processtable()

Reads the current process table, checks and then executes any appropriate
action to be taken. Does not accept any paramaters.

=cut

sub check_processtable {
	my $uid = shift;

	# Slurp in the process table
	my %proc;
	my $t = new Proc::ProcessTable;
	foreach my $p (@{$t->table}) {
		## Be sad and use the process table to find out if we have a
		## tty attached to this process
		#if ($p->{pid} == $$ && $p->{ttynum}) {
		#	%A = (
		#		MOVE_TO_COL => "\033[60G",
		#		SUCCESS => "\033[1;32m",
		#		FAILURE => "\033[1;31m",
		#		WARNING => "\033[1;33m",
		#		NORMAL => "\033[0;39m"
		#	);
		#}

		# Only grab information on processes we have rules for
		next unless $C{process}->{'*'} || $C{process}->{$p->{fname}};

		# Skip processes that don't belong to the specified UID if applicable
		next if $uid && $p->{uid} != $uid;

		my $i = !exists $proc{$p->{fname}} ? 0 : @{$proc{$p->{fname}}};
		$proc{$p->{fname}}->[$i] = {
				pid	=> $p->{pid},
				ppid	=> $p->{ppid},
				fname	=> $p->{fname},
				tty	=> $p->{ttynum},
				start	=> $p->{start},	
				pctcpu	=> isnumeric($p->{pctcpu}) || 0,
				pctmem	=> isnumeric($p->{pctmem}) || 0,
			};
	}
	undef $t;

	#if (DEBUG) {
	#	use Data::Dumper;
	#	print Dumper(\%proc);
	#}

	# Build a list of bad naughty processes
	my %slay;
	foreach my $process (keys %{$C{process}}) {
		next unless exists $proc{$process} || $process eq '*';

		#if (DEBUG) {
		#	print "Checking $process ... \n";
		#	use Data::Dumper;
		#	print Dumper($C{process}->{$process});
		#}

		foreach my $p (@{$proc{$process}}) {
			# Too many instances running
			if ($C{process}->{$process}->{instances} && @{$proc{$process}} > $C{process}->{$process}->{instances}) {
				push @{$slay{$process}}, {
						pid	=> $p->{pid},
						cause	=> 'instances',
						reason	=> sprintf("%d instances exceeds limit of %d",
								scalar @{$proc{$process}},
								$C{process}->{$process}->{instances})
					}
			}

			# Exceeded TTL
			if ($C{process}->{$process}->{ttl} && time() - $p->{start} > $C{process}->{$process}->{ttl}) {
				push @{$slay{$process}}, {
						pid	=> $p->{pid},
						cause	=> 'ttl',
						reason	=> sprintf("%d exceeds TTL of %d",
								time() - $p->{start},
								$C{process}->{$process}->{ttl})
					}
			} 

			# Check CPU and Memory usage
			pctcheck($process,$p,\%slay);
		}
	}

	# Check CPU and Memory usage for *ALL* processes
	if ($C{process}->{'*'}) {
		while (my ($process,$proclist) = each %proc) {
			for my $p (@{$proclist}) {
				pctcheck($process,$p,\%slay,'*');
			}
		}
	}

	# Check CPU and Memory usage
	sub pctcheck {
		my ($process,$p,$slayref,$scope) = @_;
		$scope ||= $process;

		# Exceeded CPU Percent
		$C{process}->{$scope}->{pctcpu} = isnumeric($C{process}->{$scope}->{pctcpu});
		if ($C{process}->{$scope}->{pctcpu} && $p->{pctcpu} > $C{process}->{$scope}->{pctcpu}) {
			push @{$slayref->{$process}}, {
					pid	=> $p->{pid},
					cause	=> 'pctcpu',
					reason	=> sprintf("%.2f%% CPU usage exceeds limit of %.2f%%",
							$p->{pctcpu},
							$C{process}->{$scope}->{pctcpu})
				}
		}

		# Exceeded Memory Percent
		$C{process}->{$scope}->{pctmem} = isnumeric($C{process}->{$scope}->{pctmem});
		if ($C{process}->{$scope}->{pctmem} && $p->{pctmem} > $C{process}->{$scope}->{pctmem}) {
			push @{$slayref->{$process}}, {
					pid	=> $p->{pid},
					cause	=> 'pctmem',
					reason	=> sprintf("%.2f%% memory usage exceeds limit of %.2f%%",
							$p->{pctmem},
							$C{process}->{$scope}->{pctmem})
				}
		}
	}

	# Kill naughty processes
	while (my ($process,$aryref) = each %slay) {
		# Decide what loglevel we should report the action as
		my $loglevel = loglevel($C{process}->{$process}->{killloglevel} ||
					$C{process}->{$process}->{loglevel} ||
					$C{killloglevel} || $C{loglevel} || LOG_NOTICE);

		# Protect safe process IDs
		if ($C{process}->{$process}->{pidfile} && !$C{process}->{$process}->{ppid}) {
			if (-e $C{process}->{$process}->{pidfile} && open(FH,$C{process}->{$process}->{pidfile})) {
				$C{process}->{$process}->{ppid} = <FH>;
				chomp $C{process}->{$process}->{ppid};
				close(FH);
			}
		}
		my $ppid = $C{process}->{$process}->{ppid} || 0;

		# See about slaying each of these process instances
		foreach my $slayref (@{$aryref}) {
			next if    $slayref->{pid} == $ppid
				|| $slayref->{pid} == $$
				|| $slayref->{pid} <= 1
				|| $C{neverkillpid} =~ /\b$slayref->{pid}\b/
				|| $C{neverkillprocessname} =~ /(^|\s+)$process(\s+|$)/;

			# Define who to mail alerts to
			my $mailto = ($C{process}->{$process}->{noemailonkill} || $C{process}->{$process}->{noemail}) ? '' : 
					$C{process}->{$process}->{adminemail} || $C{notifyemail};

			# Try to slay the process
			slay_process($process, $loglevel, $mailto, $slayref,
				exists $C{process}->{$process}->{killcmd} ? $C{process}->{$process}->{killcmd} : '');
		}
	}

	# Spawn any dead processes
	foreach my $process (keys %{$C{process}}) {
		# Only attempt to spawn a process if there are no current instances, and there is a spawncmd directive defined
		if (!exists $proc{$process} && exists $C{process}->{$process}->{spawncmd}) {

			# Decide what loglevel we should report the action as
			my $loglevel = loglevel($C{process}->{$process}->{spawnloglevel} ||
						$C{process}->{$process}->{loglevel} ||
						$C{spawnloglevel} || $C{loglevel} || LOG_NOTICE);

			# Define who to mail alerts to
			my $mailto = ($C{process}->{$process}->{noemailonspawn} || $C{process}->{$process}->{noemail}) ? '' : 
					$C{process}->{$process}->{adminemail} || $C{notifyemail};

			# Try to spawn the process
			spawn_process($process, $loglevel, $mailto, $C{process}->{$process}->{spawncmd});
		}
	}

	# Explicitly nuke it for the paranoid (yes I know it's a locally scoped lexical!) ;-)
	undef %proc;
}

=pod

=item slay_process()

Attempts to kill a process with it's killcmd, or failing that using the kill() function.
Accepts the process name, syslog log level, email notification to address and a reference
to the %slay hash.

=cut

# Type to slay a process
sub slay_process {
	my ($process, $loglevel, $mailto, $slayref, $cmd) = @_;

	# Protect safe processes
	if ($slayref->{pid} <= $C{lastsafepid} && !$C{protectsafepidsquietly}) {
		print_init_style("Saving PID $slayref->{pid} ($process) from death",'OK');
		alert $loglevel, $mailto, "Saved safe PID $slayref->{pid} ($process) from death";

	# This process is not protected
	} else { 
		print_init_style("Killing PID $slayref->{pid} ($process)");

		my $cmdrtn = $cmd && !exists $O{dryrun} ? system("$cmd > /dev/null 2>&1") : 0;
		if ($cmd) { # Tried to stop with the killcmd directive 
 			if ($cmdrtn) {
				print_init_style('FAILED');
				alert $loglevel-1, $mailto, "Failed to execute '$cmd' to kill PID $slayref->{pid} ($process)";
			} else {
				print_init_style('OK');
				alert $loglevel, $mailto, "Executed '$cmd' to kill PID $slayref->{pid} ($process)";
			}
		}

		# Don't try if killcmd was tried and succeded
		unless ($cmd && !$cmdrtn) { 
			my $killrtn = !exists $O{dryrun} ? kill(9,$slayref->{pid}) : 1;
			if ($killrtn) {
				print_init_style('KILLED');
				alert $loglevel, $mailto, "Killed PID $slayref->{pid} ($process) because $slayref->{reason}";
			} else {
				print_init_style('FAILED');
				alert $loglevel-1, $mailto, "Failed to kill PID $slayref->{pid} ($process)";
			}
		}
	}
}

=pod

=item print_init_style()

Prints a Red Hat sysvinit style status message. Accepts an array of messages
to display in sequence.

=cut

# Print a Red Hat sysinitv style status message
sub print_init_style {
	return if $O{daemon};
	foreach my $message (@_) {
		if (length($message) <= 6) {
			print "\033[60G\[";
			if    (exists $O{dryrun})    { print "\033[1;33mDRYRUN";  }
			elsif ($message eq 'OK')     { print "\033[1;32m  OK  ";  }
			elsif ($message eq 'FAILED') { print "\033[1;31m$message"; }
			elsif ($message eq 'KILLED' || $message eq 'DRYRUN') { print "\033[1;33m$message"; }
			print "\033[0;39m\]\n";
		} else {
			print $message;
		}
	}
}

=pod

=item spawn_process()

Attempts to spawn a process. Accepts the process name, syslog log level, mail
notification to address and spawn command.

=cut

# Spawn a process
sub spawn_process {
	my ($process, $loglevel, $mailto, $cmd) = @_;

	print_init_style("Starting $process");
	my $rtn = !exists $O{dryrun} ? system("$cmd > /dev/null 2>&1") : 0;
	if ($rtn) {
		print_init_style('FAILED');
		alert $loglevel-1, $mailto, "Failed to spawn '$process' with '$cmd'";
	} else {
		print_init_style('OK');
		alert $loglevel, $mailto, "Spawned '$process' with '$cmd'";
	}
}

=pod

=item display_help()

Displays command line help.

=cut

# Command line help
sub display_help {
        my $rtn = shift;

        print <<__end__;
Syntax: $SELF [--conf=filename] [--daemon] [--cron] [--user=user]
              [--adminemail=emailaddress] [--dryrun] [--help] [--version]
   --help            Display this help
   --version         Display full version information
   --dryrun          Dryrun (do not actually kill or spawn and processes)
   --daemon          Spawn in to background daemon
   --cron            Disables 'already running' errors with the --daemon option
   --conf=str        Specify alternative config filename
   --user=str        Only scan the process table for processes running as str
   --adminemail=str  Force all notification emails to be sent to str
__end__

        exit($rtn) if defined $rtn;
}

=pod

=item read_config()

Reads in runtime configuration options.

=cut

# Read in the config
sub read_config {
	my $config_file = shift;

	# Barf and die if there's no configuration file!
	unless (-e $config_file) {
		log LOG_CRIT, "Configuration file $config_file does not exist\n";
		exit 4;
	}

	# Define default configuration values
	my %default = (
			facility		=> 'LOG_DAEMON',
			loglevel		=> 'LOG_NOTICE',
			notifyemail		=> 'root@localhost',
			#notifyemailfrom		=> sprintf("\"%s\" <%s\@%s>",$SELF,(getpwuid($EFFECTIVE_USER_ID))[0],HOSTNAME),
			notifyemailfrom		=> sprintf('%s@%s',(getpwuid($EFFECTIVE_USER_ID))[0],HOSTNAME),
			smtphost		=> 'localhost',
			smtptimeout		=> 20,
			sendmailcmd		=> '/usr/sbin/sendmail -t',
			frequency		=> 10,
			lastsafepid		=> 100,
			neverkillpid		=> 1,
			neverkillprocessname	=> 'kswapd kupdated mdrecoveryd',
			protectsafepidsquietly	=> 0,
			notifydetail		=> 'verbose',
		);

	# Read config file
	my $conf = new Config::General(
			-ConfigFile		=> $config_file,
			-LowerCaseNames		=> 1,
			-UseApacheInclude	=> 1,
			-IncludeRelative	=> 1,
			-DefaultConfig		=> \%default,
			-MergeDuplicateBlocks	=> 1,
			-AllowMultiOptions	=> 1,
			-MergeDuplicateOptions	=> 1,
			-AutoTrue		=> 1,
		);
	my %config = $conf->getall;

        # Force default values for dodgy user configuration options
        $config{frequency} = $default{frequency} unless $config{frequency} =~ /^\d+$/;
	$config{lastsafepid} = isnumeric($config{lastsafepid}) || $default{lastsafepid};

	return %config;
}





########################################
# Subroutines

=pod

=item isnumeric()

An evil bastard fudge to ensure that we're only dealing with numerics when
necessary, from the config file and Proc::ProcessTable scan.

=cut

sub isnumeric {
        local $_ = shift || '';
        if (/^\s*(\-?[\d\.]+)\s*/) { return $1; }
        return undef;
}

=pod

=item loglevel()

Accepts a syslog loglevel keyword and returns the associated constant integer.

=cut

sub loglevel {
	local $_ = shift || '';
	return $_ if /^\d+$/;
	return LOG_EMERG	if /^\s*([a-z]+_)?EMERG((A|E)NCY)?\s*$/i;
	return LOG_ALERT	if /^\s*([a-z]+_)?ALERT\s*$/i;
	return LOG_CRIT		if /^\s*([a-z]+_)?CRIT(ICAL)?\s*$/i;
	return LOG_ERR		if /^\s*([a-z]+_)?ERR(OR)?\s*$/i;
	return LOG_WARNING	if /^\s*([a-z]+_)?WARN(ING)?\s*$/i;
	#return LOG_NOTICE	if /^\s*([a-z]+_)?NOTICE\s*$/i;
	return LOG_INFO		if /^\s*([a-z]+_)?INFO(MATION(AL)?)?\s*$/i;
	return LOG_DEBUG	if /^\s*([a-z]+_)?DEBUG\s*$/i;
	return LOG_NOTICE;
}

=pod

=item logfacility()

Accepts a syslog facility keyword and returns the associated constant integer.

=cut

sub logfacility {
	local $_ = shift || '';
	return $_ if /^\d+$/;
	return LOG_KERN		if /^\s*([a-z]+_)?KERN((A|E)L)?\s*$/i;
	return LOG_USER		if /^\s*([a-z]+_)?USER\s*$/i;
	return LOG_MAIL		if /^\s*([a-z]+_)?MAIL\s*$/i;
	#return LOG_DAEMON	if /^\s*([a-z]+_)?DAEMON\s*$/i;
	return LOG_AUTH		if /^\s*([a-z]+_)?AUTH\s*$/i;
	return LOG_SYSLOG	if /^\s*([a-z]+_)?SYSLOGD?\s*$/i;
	return LOG_LPR		if /^\s*([a-z]+_)?LPR\s*$/i;
	return LOG_NEWS		if /^\s*([a-z]+_)?NEWS\s*$/i;
	return LOG_UUCP		if /^\s*([a-z]+_)?UUCP\s*$/i;
	return LOG_CRON		if /^\s*([a-z]+_)?CRON\s*$/i;
	return LOG_AUTHPRIV	if /^\s*([a-z]+_)?AUTHPRIV\s*$/i;
	return LOG_FTP		if /^\s*([a-z]+_)?FTPD?\s*$/i;
	return LOG_LOCAL0	if /^\s*([a-z]+_)?LOCAL0\s*$/i;
	return LOG_LOCAL1	if /^\s*([a-z]+_)?LOCAL1\s*$/i;
	return LOG_LOCAL2	if /^\s*([a-z]+_)?LOCAL2\s*$/i;
	return LOG_LOCAL3	if /^\s*([a-z]+_)?LOCAL3\s*$/i;
	return LOG_LOCAL4	if /^\s*([a-z]+_)?LOCAL4\s*$/i;
	return LOG_LOCAL5	if /^\s*([a-z]+_)?LOCAL5\s*$/i;
	return LOG_LOCAL6	if /^\s*([a-z]+_)?LOCAL6\s*$/i;
	return LOG_LOCAL7	if /^\s*([a-z]+_)?LOCAL7\s*$/i;
	return LOG_DAEMON;
}

=pod

=item alert()

Logs a message to syslog using log() and sends a notification email using
sendmail().

=cut

# Report something to user and syslog
sub alert {
        my ($LOG_TYPE,$mailto,$subject,@ary) = @_;
	$subject ||= 'undef alert message';
	$subject .= ' [DRYRUN]' if exists $O{dryrun};
        log $LOG_TYPE, $subject;
        sendmail($C{notifyemailfrom},$mailto,$subject,@ary) if $mailto;
}

=pod

=item log()

Logs messages to DAEMON facility in syslog. Accepts a log
level and message array. Will terminate the process if it is
asked to log a message of a log level 2 or less (LOG_EMERG,
LOG_ALERT, LOG_CRIT).

=cut

sub log {
	my ($loglevel,@msg) = @_;
	$loglevel = LOG_INFO if !defined $loglevel || $loglevel !~ /^[0-7]$/;
	@msg = '' unless @msg;
	unshift @msg,'Process exiting!' if $loglevel <= 2;
	{ # Unix::Syslog gets unhappy for it's sprintf stuff otherwise :)
		(my $syslogmsg = "@msg") =~ s/%/%%/g;
		syslog $loglevel, $syslogmsg;
	}
	#if ($loglevel <= 2) {
	#	warn "@msg\n";
	#	exit(1);
	#}
}

=pod

=item sendmail()

Sends email notifications of syslog messages, called by alert().
Accepts sending email address, recipient email address, short
message subject and an optional detailed message body array.

=cut

# Send an email
sub sendmail {
        my ($mail,$to,$subject,@ary) = @_;

	$to = $O{adminemail} if exists $O{adminemail};
        @ary = $subject unless @ary;
        $subject = sprintf("[%s/%s] %s",$SELF,HOSTNAME,$subject);
        unshift @ary,"Subject: $subject\n";
        unshift @ary,"To: \"$to\" <$to>";
        unshift @ary,"From: \"$mail\" <$mail>";



#################################################
# I may remove this code in the future. I think
# it's probably overkill, and it's not especially
# portable.

	# Append /proc file information to @ary if necessary
	my @debugfiles = $C{notifydetail} =~ /^\s*verbose\s*$/i ? qw/version loadavg meminfo/ :
		$C{notifydetail} =~ /^\s*debug\s*$/i ? qw/version loadavg meminfo swaps mounts filesystems partition smodules/ :
		$C{notifydetail} =~ /^[a-z0-9\s]+$/i ? $C{notifydetail} :
		undef;
	for my $file (@debugfiles) {
		next unless $file && -e "/proc/$file";
		if (open(PFH,"/proc/$file")) {
			push @ary, ("\n/proc/$file", '=' x length("/proc/$file"));
			while (local $_ = <PFH>) { chomp; push @ary, $_; }
			close(PFH) || log LOG_WARNING, "Unable to close file handle PFH for file '/proc/$file': $!\n";
		} else {log LOG_WARNING, "Unable to open file handle PFH for file '/proc/$file': $!\n"; }
	}
#################################################



	# Open the SMTP connection
        my $smtp = Net::SMTP->new(
                        $C{smtphost},
                        Timeout	=> $C{smtptimeout},
                        Hello	=> HOSTNAME,
                );

	#  If the SMTP connection was established then send the email
	if ($smtp) { 
	        $smtp->mail($mail);
	        $smtp->to($to);
	        $smtp->data(join("\n",@ary));
	        $smtp->dataend();

	# Otherwise try using the local sendmail binary
	} else { 
		log LOG_WARNING, "Unable to establish SMTP connection with $C{smtphost}; attempting sendmail pipe instead";
		if (open(PH,"|$C{sendmailcmd}")) {
			print PH $_ for @ary;
			close(PH) || log LOG_WARNING, "Unable to close pipe handle PH for command '|$C{sendmailcmd}': $!";
		} else {
			log LOG_WARNING, "Unable to open pipe handle PH for command '|$C{sendmailcmd}': $!";
		}
	}
}

=pod

=item daemonize()

Launches the process in to the background. Checks to see if there is already an
instance running.

=cut

# Daemonize self
sub daemonize {
	my $pidfile = shift;
        # Check that we're not already running, and quit if we are
        if (-f $pidfile) {
                unless (open(PID,$pidfile)) {
			log LOG_CRIT, "Unable to open file handle PID for file '$pidfile': $!\n";
			exit 5;
		}
                my $pid = <PID>;
                close(PID) || log LOG_WARNING, "Unable to close file handle PID for file '$pidfile': $!\n";
		#print `ls -al /proc/$pid/stat` if DEBUG;
                if (-f "/proc/$pid/stat") {
                        open(FH,"/proc/$pid/stat") || log LOG_WARNING, "Unable to open file handle FH for file '/proc/$pid/stat': $!\n";
                        my $line = <FH>;
                        close(FH) || log LOG_WARNING, "Unable to close file handle FH for file '/proc/$pid/stat': $!\n";
                        if ($line =~ /\d+[^(]*\((.*)\)\s*/) {
                        #if ($line =~ /\d+\s+\((.+?)\)\s+/) {
                                my $process = $1;
                                if ($process =~ /^$SELF$/) {
                                        log LOG_NOTICE, "$SELF already running at PID $pid; exiting.\n" unless exists $O{cron};
                                        closelog;
                                        exit 0;
                                }
                        }
                } else {
                        log LOG_NOTICE, "Removing stale PID file.\n";
                        unlink($pidfile);
                }
        }

        # Daemon parent about to spawn
        if (my $pid = fork) {
                log LOG_NOTICE, "Forking background daemon, process $pid.\n";
                closelog;
                exit 0;

        # Child daemon process that was spawned
        } else {
                # Fork a second time to get rid of any attached terminals
                if (my $pid = fork) {
                        log LOG_NOTICE, "Forking second background daemon, process $pid.\n";
                        closelog;
                        exit 0;
                } else {
                        unless (defined $pid) {
				log LOG_CRIT, "Cannot fork: $!\n";
				exit 6;
			}
                        close(STDOUT); close(STDERR); chdir '/';
                        unless (open(FH,">$pidfile")) {
				log LOG_CRIT, "Unable to open file handle FH for file '$pidfile': $!\n";
				exit 7;
			}
                        print FH $$;
                        close(FH) || log LOG_WARNING, "Unable to close file handle FH for file '$pidfile': $!\n";
                }
        }
}

=pod

=item display_version()

Displays complete version, author and license information.

=back

=cut

# Display version information
sub display_version {
        my $rtn = shift;
	if ('$Revision: 1.8 $' =~ /Revision: (\S+)/) {
		print "$SELF $1\n";
	}
        print "$VERSION\n";
	print "Written by Nicola Worthington, <nicolaworthington\@msn.com>.\n\n";
	print "Copyright (C) 2002,2003,2004 Nicola Worthington.\n\n";
	print <<EOL;
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
EOL
        exit($rtn) if defined $rtn;
}




 
=pod

=head1 BUGS

Hopefully none. ;-) Send any bug reports to me at nicolaworthington@msn.com
along with any patches and details of how to replicate the problem.
Please only send reports for bugs which can be replicated in the
I<latest> version of the software. The latest version can always be
found at http://www.nicolaworthington.com

=head1 TODO

The following functionality will be added soon:

=over 4

=item Code cleanup

The code needs to be cleaned up and made more efficient.

=item killperprocessname directive

Will accept a boolean value. If true, only 1 process per process scope
will ever be killed, instead of all process IDs matching kill rules.
This should be used in conjunction with the new killcmd directive. For
example, you may define that a database daemon may never take up more
than 90% CPU time, and it runs many children processes. If it exceeds
90% CPU time, you want to issue ONE restart command in order to stop and
then start all the database processes in one go.

=item time period limited rules

Functionality to limit validity of process scopes to only be checked
between defined time periods. For example, only check that httpd is running
between the hours of 8am and 5pm on Mondays and Tuesdays.

=back

=head1 SEE ALSO

nsmon

=head1 LICENSE

Written by Nicola Worthington, <nicolaworthington@msn.com>.
Copyright (C) 2002,2003,2004 Nicola Worthington.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

=head1 AUTHOR

Nicola Worthington <nicolaworthington@msn.com>

http://www.nicolaworthington.com

=cut

=pod OSNAMES

Unix

=pod SCRIPT CATEGORIES

UNIX/System_administration

=cut

__END__

#define KERN_EMERG    "<0>"  /* system is unusable               */
#define KERN_ALERT    "<1>"  /* action must be taken immediately */
#define KERN_CRIT     "<2>"  /* critical conditions              */
#define KERN_ERR      "<3>"  /* error conditions                 */
#define KERN_WARNING  "<4>"  /* warning conditions               */
#define KERN_NOTICE   "<5>"  /* normal but significant condition */
#define KERN_INFO     "<6>"  /* informational                    */
#define KERN_DEBUG    "<7>"  /* debug-level messages             */



