#! /usr/bin/perl
    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
	if 0;  # not running under some shell

use 5.8.0;
use IO::File;
use GDBM_File;
use Getopt::Long;
use File::Basename;
use File::Find;
use File::Path;
use File::Slurp;
use HTML::Stream;
use Pod::Tree::HTML;
use strict;
use warnings;

my $me = $0; $me =~ s'.*/'';
my $VERSION = "0.1";

# Variables that may be set from the command line
my  $bg         = "white";
my  $click      = "red";
my  $font       = "1";
my  $link       = "blue";
my  $page_cols  = 3;
our $targetdir  = "/usr/local/doc/HTML/Perl";
my  $title      = "Perl Documentation";
my  $visit      = $link;

my  $dumpdb;
my  $help;
my  $sourcedir;
our $verbose;

# Tied hashes that track the state of the index
# Key is path to original (.pod or .pm) file.
# Value in %index is the path to the .html file
# Value in %nopod is the last-modified time of a .pm with no POD
our (%index, %nopod);

my @prefixes = sort { length $b <=> length $a } @INC;
   pop(@prefixes) if $prefixes[-1] eq ".";

# For the source directory, take the shortest path in @INC and 
# eliminate the last directory component.  With luck, this will
# give you /usr/lib/perl5 for a standard installation.
# Or set it on the commandline
$sourcedir = $prefixes[-1]; $sourcedir =~ s'/[^/]+$'';

# To debug in a less threatening environment, use these
#our $targetdir = "/home/geoff/tmp/html";
#my $sourcedir  = "home/geoff/tmp/pt";

# Now, replace the source directory with the target so that the 
# structure of the html files will match the distribution
@prefixes = map { s:$sourcedir:$targetdir:; $_ } @prefixes;

# And make a nice compiled regex for later
my $prefixes = "^" . join("/|^", @prefixes) . "/";
my $prefixre = qr/$prefixes/;

# Name and location of the HTML index file
my  $index      = "$targetdir/index.html";

sub Usage {
    print <<_USAGE_;
    $me $VERSION
		  [-bg color]	 Background (default $bg).
		  [-click color] Color for the click response (default $click).
		  [-cols n]	 Number of colums on the index page (default $page_cols).
    		  [-dumpdb]	 Dump .times and .nopod, then exit.
		  [-font num]	 Font size to use for the index.
		  [-help]	 Print this and quit.
		  [-link color]	 Color for the links (default $link).
    		  [-source dir]  Defaults to $sourcedir.
		  [-target dir]	 Defaults to $targetdir.
		  [-title str]	 Defaults to "Perl Documentation"
		  [-verbose]	 Babble.
		  [-visit color] Color for a link visited (default $visit).

    $me $VERSION maintains two tied hashes, .index and .nopod in
    $targetdir.  This allows it to avoid re-creating the .html
    when the .html file is newer than the .pod or .pm.  Consequently,
    its inexpensive to just re-run $me after installing new modules. 
    When .pod or .pm files are removed, this is noticed and the
    .index entry is removed as well.

_USAGE_
    exit(0);
}

sub list_pods {

    # This a "wanted" sub for find. 
    # It finds files with the extension .pod or the extension .pm which have
    # a line that begins with "=" and saves them in the index hash.

    return unless /\.(pm|pod)$/;

    my $key = $File::Find::name;

    # If the file has been indexed, we know that it has the right stuff
    # What we mean by "the right stuff" here is that we've already looked
    # at the .pm and determined that it meets the '^=' criteria.  The
    # .pod is a no-brainter at this point.
    if (exists $index{$key}) {
	print "$key already indexed \n" if $verbose;
	return;
    }

    # .pod files should have POD; with .pm's its problematical
    # So, check the .pm file to see if there are any lines beginning with "="

    if (/\.pm$/) {
	# Do we need to do this?
	if (exists $nopod{$key}) {
	    my $time = $nopod{$key};
	    if ($time and $time > (stat($key))[9]) {
		print "No need to check $key for POD\n" if $verbose;
		return;
	    }
	}
	# Get the source and check
	print "Checking $key for POD\n" if $verbose;
	my $file = read_file($File::Find::name);

	# This is not a foolproof check.  But its cheap.
	if ($file !~ /^=/m ) {
	    print "No POD in $_\n" if $verbose;
	    $nopod{$key} = (stat($key))[9];
	    return;
	}
    }

    # Strip the extension (Remember: $_ is the current file name)
    my ($file) = /(.*).(pm|pod)$/;
    my $dir    = $File::Find::dir;
       $dir    =~ s/$sourcedir/$targetdir/;

    # Initialize the tied hash
    print "$key indexed\n" if $verbose;
    $index{$key} = "$dir/$file.html 0";
}

sub do_index {

    my $fh = new IO::File(">$index") or die "$me: Can't open $index: $!\n";
    my $stream = new HTML::Stream($fh);

    # Page title (browser title bar)
    $stream -> HTML;
    $stream -> HEAD->TITLE->text($title)->_TITLE->_HEAD;

    # fonts and colors
    $stream -> BODY(BGCOLOR=>$bg, LINK=>$link, ALINK=>$click, VLINK=>$visit);

    # Page heading
    $stream -> CENTER->H1->text($title)->_H1->_CENTER;

    # Make the page content a table of $page_cols columns
    $stream -> TABLE;
    $stream -> TR;

    my %tags;
    for (values %index) {
	my ($url) = /^$targetdir\/?(\S+)/;
	my ($tag) = /$prefixre(.*)\.html/;
	$tag =~ s'/'::'g;
	$tag =~ s'^pod::''; # Hack for the pod:: docs
	$tags{$tag} = $url;
    }

    my $n = 0;
    foreach (sort { uc $a cmp uc $b } keys %tags) {
	$stream -> TD->FONT(SIZE=>$font) if defined $font;
	$stream -> A(HREF => $tags{$_})->text($_)->_A->_FONT->_TD->text("\n");
	$stream -> _TR->TR if (++$n % $page_cols) == 0;
    }

    $stream -> _TR;
    $stream -> _TABLE;
    $stream -> _FONT if defined $font;
    $stream -> _BODY;
    $stream -> _HTML;

    $fh->close();
}

# Here's where it all begins
Usage() unless GetOptions("bg:s"	=> \$bg,
			  "click:s"	=> \$click,
			  "cols:s"	=> \$page_cols,
			  "dumpdb"	=> \$dumpdb,
			  "font:s"	=> \$font,
			  "help"	=> \$help,
			  "link:s"	=> \$link,
			  "source:s"	=> \$sourcedir,
			  "target:s"	=> \$targetdir,
			  "title:s"	=> \$title,
			  "visit:s"	=> \$visit,
			  "verbose"	=> \$verbose,
			 );
Usage() if defined $help;
Usage() if @ARGV;

$SIG{INT} = sub {
    untie %index;
    untie %nopod;
    exit(1);
};

tie %index, "GDBM_File", "$targetdir/.index", &GDBM_WRCREAT, 0640
    or die "Unable to open $targetdir/.index - $!\n";

tie %nopod, "GDBM_File", "$targetdir/.nopod", &GDBM_WRCREAT, 0640
    or die "Unable to open $targetdir/.nopod - $!\n";

if (defined $dumpdb) {
    print "$targetdir/.times\n";
    for (sort keys %index) {
	my ($h, $t) = split(/ /, $index{$_});
	print "  $_\n    $h\n    ", scalar(localtime($t)), "\n";
    }
    print "\n$targetdir/.nopod\n";
    for (sort keys %nopod) {
	my $t = $nopod{$_};
	print "  $_\n    ", scalar(localtime($t)), "\n";
    }

    exit(0);
}

# Generate a hash (in %index) of qualifying .pm and .pod files
find(\&list_pods, $sourcedir);

my $link_map = new LinkMap;

for (sort keys %index) {

    # Eliminate files that were, but are no more.
    unless (-e $_) {
	print "Removing index entry for $_\n";
	delete $index{$_};
	next;
    }

    my ($file, $time) = split(/ /, $index{$_});

    # Removing the .html is a way to force a rebuild. 
    $time = 0 unless -e $file;

    # See if there's anything to do.  stat[9] is last modified time
    next if $time && $time > (stat($_))[9];

    # Make an html file from the POD
    my $dir  = dirname($file);
    mkpath($dir) unless -d $dir;

    my ($base) = $file =~ /^$targetdir\/?(.*).html/;

    print "$_ =>\n    $file\n" if defined $verbose;

    # Make the .html from the POD in the source file
    my $html = new Pod::Tree::HTML($_, $file, link_map => $link_map,
					      base     => $base);
    $html->translate();

    # Record the creation time of the .html file 
    $index{$_} = "$file " . (stat($file))[10];
}

# Make an index of the html files   
do_index();

# Clean up
untie %index;
untie %nopod;

package LinkMap;

use strict;
use File::Basename;
use vars qw(@ISA @EXPORT);
require Exporter;
@ISA = qw(Exporter);

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my $self = {};
    bless $self, $class;
    return $self;
}

sub map {
    my ($self, $base, $page, $section, $depth) = @_;

    return ($base, $page, $section) unless length $page;

    unless (defined $self->{LOC}) {
	$self->{LOC} = {};
	# Invert the html list
	my @html = values %main::index;
	for (@html) {
	    s/ .*//;	# Remove time
	    my ($name, $path, $suffix) = fileparse($_, ".html");
	    if (exists $self->{LOC}{$name}) {$self->{LOC}{$name} = [$path]}
	    else 			   {push(@{$self->{LOC}{$name}}, $path)}
	}
    }

    my $tag = $page;  $tag =~ s/.*://;
    unless (exists $self->{LOC}{$tag}) {
	print "No file for $tag.html, ref by $base.html\n";
    }
    else {
	my $refs = $self->{LOC}{$tag};
	print "Multiple sources for $tag.html, using first\n    ",
	      join("\n    ", @$refs), "\n"
	    if @$refs > 1; # and $main::verbose;
	$base = $refs->[0];
    }
    return ($base, $tag, $section);
}
1;

=head1 NAME

pod2indexed_html

=head1 SYNOPSIS

pod2indexed_html [-bg color] [-click color] [-cols n] [-dumpdb]
  [-font num] [-help] [-link color] [-source dir] [-target dir] 
  [-title str] [-verbose] [-visit color]

=head1 DESCRIPTION

PodITondexedHtml locates all the POD in your distribution, converts it to 
HTML and makes an index page. Links are rendered so as to refer to the 
appropriate newly-generated pages.

The principal advantage of pod2indexed_html is that it uses a persistent
database of module creation times
so that once its been run for the first time, subsequent
executions are relatively quick, depending of course on what has been changed.  pod2indexed_html notices both new modules and updates.

The HTML index is flat, organized to look like a module hierarchy. For example,
DBI::Const::GetInfo::ANSI.  One quirk: the perldoc entries omit "pod".

=head1 OPTIONS

      -bg color		Background (default white).
      -click color 	Color for the click response (default red).
      -cols n	 	Number of colums on the index page (default 3).
      -dumpdb	 	Dump .times and .nopod, then exit.
      -font num		Font size to use for the index.
      -help	 	Print this and quit.
      -link color	Color for the links (default blue).
      -source dir  	Defaults to @IND, excluding '.'.
      -target dir	Defaults to /usr/local/doc/HTML/Perl.
      -title str	Defaults to "Perl Documentation"
      -verbose		Babble.
      -visit color 	Color for a link visited (default blue).

=head1 ENVIRONMENT

No environment variables are used.

=head1 PREREQUISITES

This script requires the following modules:
C<IO::File>, C<GDBM_File>, C<Getopt::Long>, C<File::Basename>, C<File::Find>,
C<File::Path>, C<File::Slurp>, C<HTML::Stream>, C<Pod::Tree::HTML>.

It also requires Perl 5.8.0, but should run under earlier version with only
minor modifications.  Required modules willing, of course.

=head1 DIAGNOSTICS

No file for <something>.html, ref by <some module>

    A link has been discovered that can't be satisfied.
    Won't work when clicked upon, but otherwise harmless.

Multiple sources for <something>.html, using first

    The file <something>.html is referenced in different places. 
    Confusion reigns.

Removing index entry for <some module>

    Just a heads-up that some module has been removed.

=head1 AUTHOR

Geoffrey Leach C<geoff@cdepot.net>

=head1 VERSION

0.1

=head1 COPYRIGHT AND LICENSE

Copyright 2003 by Geoffrey Leach

This script is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

=pod SCRIPT CATEGORIES

CPAN/Administrative

=cut

