#!/usr/bin/perl

# tell the web browser we're going to send it HTML text it needs to process
# instead of plain text which it can display without processing
print "Content-Type: text/html\n\n";

# variables
$debug = 0;
$linenumber = 0;
$error = 0;

# Version history
# 1.0.0  Original program written
# 1.1.0  Changed the output to HTML 4.01 compliant
# 2.0.0  Changed the IP addresses to 4 sets of 3 numbers for ease of readability
# 2.1.0  Added the ability of checking whois to the IP addresses with a single click
# 2.1.1  Added comments and version number for addition to the CPAN archives
# 2.1.2  Added better error handling
# 2.2.0  changed the local net address selection and added both local net selections.
#
$version = "GetLog version 2.2.0";

# The location of an apache log file in the following format
#
# 10.0.1.1 - - [15/Jan/2005:01:09:18 -0500] "GET /cgi-bin/getagentlog.cgi HTTP/1.1" 200 59734
#
$mylogfilename = "/private/var/log/httpd/access_log";

$mytitle = "Access log";

# Create an error message
if ($debug == 1) {
  $errormessage = join( "", "<h3>Cannot open the access logs.<br />\n", $mylogfilename, "</h3>\n\n");
  }
else {
  $errormessage = "</small>\n\n<h3>Cannot open the log file.</h3>\n\n<small>\n";
  }

# print a HTML header for the display
print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n";
print "<html>\n";
print "<head>\n";
print "  <title> ",$mytitle," </title>\n";
print "  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n";
print "  <link type=\"text/css\" rel=\"stylesheet\" href=\"/css/chapterstyle.css\">\n";
print "</head>\n";
print "<body>\n";
print "<table class=\"noframes\">\n<tr>\n<td>\n";
print "<h1>",$mytitle,"</h1>\n";
print "<h4>",$version,"</h4>\n";
print "<hr />\n<p class=\"noindent\">line number - IP address - date/time - method - file - protocol - result code - bytes served</p>\n";
print "<ul> Result codes\n";
print "  <li>200 - file found and served</li>\n";
print "  <li>302 - file moved</li>\n";
print "  <li>304 - file has not changed</li>\n";
print "  <li>404 - file not found</li>\n";
print "  <li>500 - file access error, somebody tried to access a file outside the web server folder.</li>\n";
print "</ul>\n";
print "<hr />\n\n<small>\n";

# Open up the log file
open(LOGFILE, $mylogfilename) or $error = 1;

if ($error == 1) {
  print $errormessage; }
else {

# get the data
  @data = <LOGFILE>;
  foreach $line(@data) {
# get rid of the overload logs that Apache cannot filter out
    if (($line =~ m/^.*\"SEARCH.*\"/) or ($line =~ m/^.*\"CONNECT.*\"/)) {
      
      }
    else {
# filter out the local net addresses (10.0.1.x and 192.168.1.x)
      if (($line =~ m/^10\D0\D1\D*/) or ($line =~ m/^192\D168\D1\D*/)) {
        }
      else {
# convert the numbers into 3 digits each for easier readability
        # match first number in ip address
        if ($line =~ m/^\d\D/) {
          $first = join("","00",substr($line,0,1));
          $line =~ s/^\d\D//;
          }
        elsif ($line =~ m/^\d\d\D/) {
          $first = join("","0",substr($line,0,2));
          $line =~ s/^\d\d\D//;
          }
        else {
          $first = substr ($line,0,3);
          $line =~ s/^\d\d\d\D//;
          };

        # match second number in ip address
        if ($line =~ m/^\d\D/) {
          $second = join("","00",substr($line,0,1));
          $line =~ s/^\d\D//;
          }
        elsif ($line =~ m/^\d\d\D/) {
          $second = join("","0",substr($line,0,2));
          $line =~ s/^\d\d\D//;
          }
        else {
          $second = substr ($line,0,3);
          $line =~ s/^\d\d\d\D//;
          };

        # match third number in ip address
        if ($line =~ m/^\d\D/) {
          $third = join("","00",substr($line,0,1));
          $line =~ s/^\d\D//;
          }
        elsif ($line =~ m/^\d\d\D/) {
          $third = join("","0",substr($line,0,2));
          $line =~ s/^\d\d\D//;
          }
        else {
          $third = substr ($line,0,3);
          $line =~ s/^\d\d\d\D//;
          };

        # match fourth number in ip address
        if ($line =~ m/^\d\D/) {
          $fourth = join("","00",substr($line,0,1));
          $line =~ s/^\d\D//;
          }
        elsif ($line =~ m/^\d\d\D/) {
          $fourth = join("","0",substr($line,0,2));
          $line =~ s/^\d\d\D//;
          }
        else {
          $fourth = substr ($line,0,3);
          $line =~ s/^\d\d\d\D//;
          };

# convert the IP back into 4 digits
        $ip = join(".", $first, $second, $third, $fourth);
# The URL of the whois server query
        $whois = join("", "http://ws.arin.net/cgi-bin/whois.pl?queryinput=", $ip);

        $linenumber++;
        $line =~ s/\n//;
        print "<p class=\"noindent\">",$linenumber,": <a class=\"noindent\" href=\"",$whois,"\">",$ip,"</a> ",$line,"</p>\n";
        };
      };
    };

  close(LOGFILE);
  if ($linenumber == 0) {
    print "<p class=\"noindent\">No log entries at this time.  The log has just been freshly cleaned.</p>\n";
    };
  };

# print the HTML footer
print "</small>\n\n<hr />\n";
print "<h1>The End</h1>\n<hr />\n";
print "</td>\n</tr>\n</table>\n";
print "</body>\n",;
print "</html>\n\n";



exit 0 ;



