#!perl5 -w
use strict qw(refs subs);

# version 1.10
# by Marko.Macek@snet.fri.uni-lj.si, mark@hermes.si
#
# TODO:
#   eliminate blank panes that link to next pane when =item XX\n\n=itemYY used
#   rewrite ?<> parsing
#   better index
#   cleaner xref heuristics
#   process embeded pods
#   IPFC doesn't seem to handle tabs - are 
#   handle perl/SYNOPSIS properly (tabs, indented lines) -- or is it a bug in doc
#     probably should process as pre but with markup -- done below (ok?)
#   remove =head1 NAME and use it as toplevel heading
#     (also collapse DESCRIPTION if the only section).
#   pod2ipf needs to be split into index generator and translator
#     this should enable separate translation of each .pod
#     and use of .INF concatenation to view the full docs together
#     (with linking if index was used).
#     IPF requires numerical references when concatenation is used, not symbolic :-(
#   improved handling of windows
#   ...

$DocTitle = 'Perl Manual';

@files =
    ( # from perl.pod
     #  file         section name
     [ 'perl',      'Perl overview' ],
     #[ 'perltoc',   'Perl documentation table of contents' ],
     [ 'perldata',  'Perl data structures' ],
     [ 'perlsyn',   'Perl syntax' ],
     [ 'perlop',    'Perl operators and precedence' ],
     [ 'perlre',    'Perl regular expressions' ],
     [ 'perlrun',   'Perl execution and options' ],
     [ 'perlfunc',  'Perl builtin functions' ],
     [ 'perlvar',   'Perl predefined variables' ],
     [ 'perlsub',   'Perl subroutines' ],
     [ 'perlmod',   'Perl modules' ],
     [ 'perlref',   'Perl references' ],
     [ 'perldsc',   'Perl data structures intro' ],
     [ 'perllol',   'Perl data structures: lists of lists' ],
     [ 'perlobj',   'Perl objects' ],
     [ 'perltie',   'Perl objects hidden behind simple variables' ],
     [ 'perlbot',   'Perl OO tricks and examples' ],
     [ 'perldebug', 'Perl debugging' ],
     [ 'perldiag',  'Perl diagnostic messages' ],
     [ 'perlform',  'Perl formats' ],
     [ 'perlipc',   'Perl interprocess communication' ],
     [ 'perlsec',   'Perl security' ],
     [ 'perltrap',  'Perl traps for the unwary' ],
     [ 'perlstyle', 'Perl style guide' ],
     [ 'perlxs',    'Perl XS application programming interface' ],
     [ 'perlxstut', 'Perl XS tutorial' ],
     [ 'perlguts',  'Perl internal functions for those doing extensions ' ],
     [ 'perlcall',  'Perl calling conventions from C' ],
     [ 'perlembed', 'Perl how to embed perl in your C or C++ app' ],
     [ 'perlpod',   'Perl plain old documentation' ],
     [ 'perlbook',  'Perl book information' ],
    );

# in these sections an =item will be treated as an =head[n]
# this is necessary because .IPF/.INF format/compiler have
# some limitations for pane size and linking. :-(

@split_sections =
    (
     'perlfunc/DESCRIPTION/Alphabetical Listing of Perl Functions',
     'perldiag/DESCRIPTION',
     'perlvar/DESCRIPTION/Predefined Names',
    );

$font = ''; #':font facename=Helv size=16x8.';

$debug_xref = 0;
$dump_xref = 0;
$dump_contents = 0;
$ref_delta = 1;     # start from 1
$maxtoc = 5;
$dots = 0;
$multi_win = 1;     # 1 = use alternate window for toc

sub out;
sub contents;
sub escape;
sub addref;
sub findref;
sub winhead;
sub winlink;
sub no_markup_len;

$/ = "";

foreach $sc (@split_sections) { $as_head{$sc} = 1; }

print ":userdoc.\n";
print ":title." . $DocTitle . "\n";

for ($pass = 1; $pass <= 2; $pass++) {

    $headno = 0; # make refs hash for this on first pass

    print STDERR "pass: $pass\n";
    for ($fn = 0; $fn <= $#files; $fn++) {
        $fname = $files[$fn][0] . '.pod';
        $page = $files[$fn][0];
        $toc = $maxtoc;
        
        open(IN, $fname) || die "open $fname: $!";
        print STDERR $fname . ": ";
        print STDERR "\n" if !$dots;

        $section = $files[$fn][0] . ' - ' . $files[$fn][1];
        if ($pass == 1) {
            addsection($section, $headno, 1);
            addref($page, $headno);
        }
        $section_head[1] = $page;
        $path = $section_head[1];
        if ($pass == 2) {
            print ":h1 toc=$toc " . winhead($headno)
                . " id=" . ($headno + $ref_delta) . "."
                . out($section, 0) . "\n" . $font;
            print ":i1." . out($section, 0) . "\n";
        }
        $headno++;
        
        @lstack = ();
        $emptypane = 1;
        
        PARA: while ($line = <IN>) {
            chomp $line;
            if ($line =~ /^=head(\d+)\b\s*/) {
                $nopara = 0;
                $heading = $';
                $hl = $1 + 1;
                contents($hl, $headno) if $emptypane;
                if ($pass == 1) {
                    addsection($heading, $headno, $hl);
                    addref(qq|$page/"$heading"|, $headno);
                }
                $section_head[$hl] = $heading;
                $path = join('/', @section_head[1..$hl]);
                if ($pass == 2) {
                    print ":h$hl " . winhead($headno)
                        . " id=" . ($headno + $ref_delta) . "."
                        . out($heading, 0) . "\n" . $font;
                    print ":i1." . out($heading, 0) . "\n";
                }
                $headno++;
                print STDERR "." if $dots;
                $emptypane = 1;
            } elsif ($line =~ /^=over\b/) {
                # look ahead, to see how the list should look like
                chomp($line = <IN>);
                if ($line =~ /^\=item\s+\*/) { # item *
                    push(@lstack, "ul");
                    print ":ul.\n" if $pass == 2;
                } elsif ($line =~ /^\=item\s+1\.?/) {  # item 1. 
                    push(@lstack, "ol");
                    print ":ol.\n" if $pass == 2;
                } elsif (defined($as_head{$path})) {
                    # in some cases we use headings instead of lists
                    warn "toc for $page, id=$headno too low" if ! $toc >= $hl + 1;
                    push(@lstack, "head");
                    $hl++;
                    $eitems = "";
                } else {
                    push(@lstack, "ul");
                    print ":ul.\n" if $pass == 2;
                }
                $nopara = 0;
                redo PARA;
            } elsif ($line =~ /^=back\b/) {
                if ($#lstack >= 0) {
                    $t = pop(@lstack);
                    if ($t eq 'ul') {
                        print ":eul.\n" if $pass == 2;
                    } elsif ($t eq 'ol') {
                        print ":eol.\n" if $pass == 2;
                    } elsif ($t eq 'head') {
                        $hl--;
                    }
                } else {
                    warn "stack empty on page=$page, id=$headno";
                    $hl--;
                }
                $nopara = 0;
            } elsif ($line =~ /^=item\b\s*/) {
                $nopara = 0;
                $heading = $';
                $headx = $heading;
                print STDERR "." if $dots;
                if (($#lstack == -1) || ($lstack[$#lstack] eq 'head')) {
                    contents($hl, $headno) if $emptypane;

                    # lowest level never empty, IPFC uses next page
                    # by default (but Back button doesn't work :-()
                    $emptypane = 0;

                    if ($pass == 1) {
                        addsection($heading, $headno, $hl);
                        addref(qq|$page/"$headx"|, $headno);
                        $headx =~ /(\w+)/;
                        $word = $1;
                        addref(qq|$page/"$word"|, $headno) if defined $word;
                        $headx =~ /(\S+)/;
                        $word = $1;
                        addref(qq|$page/"$word"|, $headno) if defined $word;
                    }
                    $section_head[$hl] = $heading;
                    $path = join('/', @section_head[1..$hl]);
                    if ($pass == 2) {
                        print ":h$hl " . winhead($headno)
                            . " id=" . ($headno + $ref_delta) . "."
                            . out($heading, 0) . "\n" . $font;
                        print ":i1." . out($heading, 0) . "\n";
                    }
                    $headno++;

                    # look ahead to see if this =item is empty.
                    # if it is, create a list of empty pages of
                    # on first non-empty.
                    chomp($line = <IN>);
                    if ($pass == 2) {
                        if ($line =~ /^=item\b/) {
                            $eitems .= $heading . "\n";
                        } elsif ($eitems ne "") {
                            $eitems .= $heading . "\n";
                            foreach $l (split("\n", $eitems)) {
                                print ":p.:hp2." . out($l, 1) . ":ehp2.";
                            }
                            $eitems = "";
                        }
                    }
                    redo PARA;
                } else {
                    $emptypane = 0;
                    if ($lstack[$#lstack] eq 'ul' && $heading =~ /^\s*\*\s*(.*)$/ or
                        $lstack[$#lstack] eq 'ol' && $heading =~ /^\s*\d+\.?\s*(.*)$/)
                    {
                        if ($pass == 2) {
                            print ":li.";
                            $heading = $1;
                            if ($1 ne "") {
                                
                                print out($heading, 1) . "\n";
                                print ":i1." . out($heading, 0) . "\n";
                            } else {
                                $nopara = 1;
                            }
                        }
                    } else {
                        if ($pass == 2) {
                            print ":li." . out($heading, 1) . "\n";
                            print ":i1." . out($heading, 0) . "\n" if $pass == 2;
                        }
                    }
                }
            } elsif ($line =~ /^=\w+/) {
                warn "what to do with '$line'?\n";
            } elsif ($line =~ /^\s+\S/) {
                if ($pass == 2) {
                    @tlines = split(/\n/, $line);
                    foreach $tline (@tlines) {
                        1 while $tline =~ s/\t+/' 'x (length($&) * 8 - length($`) % 8)/e;
                    }
                    $pre = join("\n", @tlines);
                    print "\n:xmp.\n" . escape($pre) . ":exmp.";
                }
                $nopara = 0;
                $emptypane = 0;
            } elsif ($line =~ /^\s+\S/m) { # see perl(run)?/SYNOPSIS for this
                if ($pass == 2) {
                    $mark = out($line, 1);

                    # hack hack ;-)
                    # IPFC doesn't handle tabs
                    # no_markup_len tries to guess the # of ' ' to next tab,
                    # but even when the guess is correct, things don't seem
                    # to align when bold,.. is used :-(
                    @tlines = split(/\n/, $mark);
                    foreach $tline (@tlines) {
                        1 while $tline =~ s/\t+/' 'x (length($&) * 8 - &no_markup_len($`) % 8)/e;
                    }
                    $pre = join("\n", @tlines);
                    
                    print "\n:xmp.\n" . $pre . ":exmp.";
                }
                $nopara = 0;
                $emptypane = 0;
            } else {
                if ($pass == 2) {
                    print ":p." if !$nopara;
                    print out($line, 1);
                } else {
                    if ($line =~ /^\s+$/) {
                        warn "line with blanks in $page, id=$headno\n";
                    }
                }
                $nopara = 0;
                $emptypane = 0;
            }
        }
        close(IN);
        print STDERR "\n" if $dots;
    }
}
print "\n:euserdoc.\n";

if ($dump_xref) {
    foreach (keys %links) {
        print STDERR $_ . "->" . $links{$_} . "\n";
    }
}
if ($dump_contents) {
    for($i = 0; $i <= $#head; $i++) {
        print STDERR "    " x $headlevel[$i], $head[$i], "\n";
    }
}

sub out {
    my $para = $_[0];
    my $markup = $_[1];
    my @stack = ();
    my $output = "";

    return if ($pass == 1);
    
    $cpos = 0;
    $opos = 0;
    TAG: while ($para =~ m{([<>])}g) { # ;-) ;-)
        $cpos = pos $para;
        $c = $1;
        
        if ($c eq '<' && $cpos >= 0) {
            $output .= escape(substr($para, $opos, $cpos - $opos - 2));
            
            $c = substr($para, $cpos - 2, 1);
            if ($c !~ /[A-Z]/) {
                $output .= escape($c . '<');
                pos($para) = $opos = $cpos;
                next TAG;
            }
            if ($c eq 'B') {
                $output .= ':hp2.' if $markup;
                push (@stack, $c);
            } elsif ($c eq 'F') {
                $output .= ':hp2.' if $markup;
                push (@stack, $c);
            } elsif ($c eq 'S') {
                $output .= ':hp2.' if $markup;
                push (@stack, $c);
            } elsif ($c eq 'I') {
                $output .= ':hp1.' if $markup;
                push (@stack, $c);
            } elsif ($c eq 'C') {
                $output .= ':hp2.' if $markup;
                push (@stack, $c);
            } elsif ($c eq 'L') {
                my $link;
	        #push (@stack, $c);
                # link
                pos $para = $cpos;
                if ($para =~ m/\G([^>]+)\>/g) {
                    $cpos = pos $para;
                    $link = $1;
                    $foundlink = findref($link); 
                    if (defined $links{$foundlink}) {
                        $output .= ":link reftype=hd refid=" .
                            ($links{$foundlink} + $ref_delta) . '.'
                            if $markup;
                        $output .= escape($link);
                        $output .= ":elink." if $markup;
                    } else {
                        warn "   unresolved link: $link\n";
                        $output .= escape($link);
                    }
                }
            } elsif ($c eq 'E') {
                pos ($para) = $cpos;
                if ($para =~ m/\G([A-Za-z]+)>/g) {
                    my $esc;
                    $cpos = pos $para;

                    $esc = exists $HTML_Escapes{$1} ? $HTML_Escapes{$1} : "E<$1>";
                    $output .= escape($esc);
                } else {
                    warn "$fname: E<> ???\n";
                }
            } else {
                warn "$fname: what to do with $c<> ?\n";
            }
        } elsif ($c eq '>' && $#stack >= 0) {
            $output .= escape(substr($para, $opos, $cpos - $opos - 1));
            
            $c = pop(@stack);
            if ($c eq 'B') {
                $output .= ':ehp2.' if $markup;
            } elsif ($c eq 'F') {
                $output .= ':ehp2.' if $markup;
            } elsif ($c eq 'S') {
                $output .= ':ehp2.' if $markup;
            } elsif ($c eq 'I') {
                $output .= ':ehp1.' if $markup;
            } elsif ($c eq 'C') {
                $output .= ':ehp2.' if $markup;
            } elsif ($c eq 'L') {
                # end link
            } else {
                $output .= escape('>');
            }
        } else {
            $output .= escape(substr($para, $opos, $cpos - $opos));
        }
        pos($para) = $opos = $cpos;
    }
    $output .= escape(substr($para, $opos, length($para) - $opos));
    if (!$markup) { # for toc/index/...
        $output =~ s/\n\s*/ /g;
        $output = substr($output, 0, 80); # strip too long stuff
    }
    return $output;
}

sub contents {
    my $level = $_[0];
    my $no = $_[1];
    my ($i, $cl, $toplevel);

    if ($pass == 1) {
        $wingroup[$no - 1] = 2;
        return ;
    }
    
    $i = $no;

    while ($i > 0 && $headlevel[$i] >= $level) { $i--; }

    $toplevel = $headlevel[$i];

    print ":p." . out($head[$i], 0) . "\n";
    $i++;
    $cl = $toplevel;
    for (; $i <= $#head && $headlevel[$i] > $toplevel; $i++) {
        if ($headlevel[$i] > $cl) {
            warn "bad nesting: $toplevel, $headlevel[$i], $cl, $i, `$head[$i]`\n" if $headlevel[$i] != $cl + 1;
            print ":ul.\n";
            $cl++;
        } elsif ($cl > $headlevel[$i]) {
            while ($cl > $headlevel[$i]) {
                print ":eul.\n";
                $cl--;
            }
        }
        print ":li.:link reftype=hd " . winlink($i)
            . " refid=" . ($i + $ref_delta) . "."
            . out($head[$i], 1) . ":elink.\n";
    }

    while ($cl > $toplevel) {
        print ":eul.\n";
        $cl--;
    }
}

sub findref { # various heuristics to get a valid link
    my $link = $_[0];
    
    $link =~ tr/\n/ /;
    print STDERR "link: $link\n" if $debug_xref;
    if (!defined $links{$link}) { # try harder
        if (defined $links{qq|$page/"$link"|}) {
            $link = qq|$page/"$link"|;
        } elsif ($link =~ /^"/) {
            $link = "$page/$link";
        } elsif ($link =~ m|^/"|) {
            $link = "$page$link";
        } elsif ($link =~ m|^/|) {
            $link = qq|$page/"$'"|;
        } elsif ($link =~ m|^([^/ ]+)/([^"]+)$|) {
            $link = qq|$1/"$2"|;
        } else {
        }
        if ($link =~ m|^([^/ ]+)/"([^"]+)"$| && !defined $links{$link}) {
            my $a = $1;
            my $b = $2;
            my $linka;
            
            if ($b =~ /\(\)$/) { $b = $`; } # open() -> open, ...
            $linka = qq|$a/"$b"|;
            if (defined $links{$linka}) {
                $link = $linka;
            }
        }
        print STDERR "trans: $link\n" if $debug_xref;
    }
    return $link;
}

sub addref {
    my $page = $_[0];
    my $num = $_[1];
    
    $links{$page} = $num;
}

sub addsection {
    my $section = $_[0];
    my $num = $_[1];
    my $level = $_[2];

    $head[$num] = $section;
    $headlevel[$num] = $level;
}

sub escape {
    my $l = $_[0];

    $l =~ s/\&/\&amp./g;
    $l =~ s/\:/\&colon./g;
    return $l;
}

BEGIN {
    %HTML_Escapes =
        (
         'amp'	=>	'&',	#   ampersand
         'lt'	=>	'<',	#   left chevron, less-than
         'gt'	=>	'>',	#   right chevron, greater-than
         'quot'	=>	'"',	#   double quote
        );
}

sub winhead {
    my $no = $_[0];

    if ($multi_win) {
        if (defined $wingroup[$no]) {
            return "group=$wingroup[$no] x=left width=30%";
        }
    }
    return "";
}

sub winlink {
    my $no = $_[0];
    
    if ($multi_win) {
        if (defined $wingroup[$no]) {
            return "group=$wingroup[$no] vpx=left vpcx=30%";
        } else {
            return "group=1 dependent vpx=right vpcx=70%"
        }
    } 
    return "";
}

sub no_markup_len { # quick hack
    my $l = $_[0];

    $l =~ s/\:.*?\.//g;
    $l =~ s/\&.*?\./x/g;
    return length $l;
}
