#!/usr/bin/perl
# vim:sw=4 sta showmatch

# $Id: man_xml,v 1.7 2001/02/25 21:46:34 stevecheng Exp $

use strict;

package manxml;

use XML::Parser::PerlSAX;
use XML::SGMLSpl;
use Getopt::Long;

##################################################
#
# Option parsing
#
##################################################

sub options_help {
    print "Usage: $0 [OPTION]... [FILE]...\n";
    print <<'end';
Make man pages from Man-XML

  --symlinks            Symlinks are used to make man pages with multiple
                        names.
  --solinks             Stub pages with .so requests to the main man page
                        are used instead.
  --no-links            Do not automatically make any man page links.
                        (Default)
  --help                Display this help and exit.
  --version             Output version information and exit.

See accompanying documentation for more details.
end
    exit 0;
}

sub options_version
{
    print <<'end';
man_xml (part of docbook2X)
$Revision: 1.7 $ $Date: 2001/02/25 21:46:34 $
<URL:http://docbook2x.sourceforge.net/>

Copyright (C) 2000-2001 Steve Cheng
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
end
    exit 0;
}

%manxml::options = (
    'symlinks' => 0,
    'solinks' => 0,
    'no-links' => 1
);
   
$SIG{__WARN__} = sub { print STDERR "$0: " . $_[0]; };
if(!GetOptions(\%manxml::options,
    'symlinks',
    'solinks',
    'no-links',
    'help', \&options_help,
    'version', \&options_version))
{
    print STDERR "Try \"$0 --help\" for more information.\n";
    exit 1;
}
$SIG{__WARN__} = undef;

if($manxml::options{'symlinks'} +
   $manxml::options{'solinks'} +
   $manxml::options{'no-links'} != 1)
{
    print STDERR "$0: Only one of --symlinks, --solinks or --no-links options is allowed!\n";
    exit 1;
}




my $sgmlspl = new XML::SGMLSpl;

##################################################
#
# Output routines
#
##################################################

sub output {
    my $text = shift;

    if($text =~ s/^\n//) {
	# Force newline if needed
	print MANPAGE "\n" unless $manxml::newline_last++;
    }
    return if $text eq '';
    
    print MANPAGE $text;

    $manxml::newline_last = ($text =~ /\n$/);
}



# FIXME
sub man_escape
{
    my $s = shift;
    for($s) {
	s/\\/\\e/g;
	s/\'/\\\&'/g;
	s/\./\\\&./g;
    }
    return $s;
}

sub man_arg_escape
{
    my $s = shift;
    $s =~ s/[ \n]/\\ /g;	# This should be enough.
    return $s;
}

# Remove leading and trailing blanks.
#
sub strip_string
{
    my $str = shift;

    $str = $1 if ($str =~ m#^\s*(\S.*)#);
    $str = $1 if ($str =~ m#^(.*\S)\s*$#);

    return $str;
}

# Generate a good file name, for given a refentrytitle and manvolnum
#
sub man_fileinfo
{
    my $title = strip_string(shift);
    my $sect = strip_string(shift);
    # FIXME What should we do with non-ASCII characters?
    $title =~ tr/ /_/;
    $sect =~ tr/ /_/;
    return "$title.$sect";
}



##################################################
#
# A clean solution to the extra-newlines problem
#
##################################################

# man_xml keeps track of what type of element (block, inline 
# or neither) it just processed, then makes line breaks for
# the next element only if necessary
#
# These functions are passed the *parent* element,
# i.e. the container element of the element possibly needing
# a break. (no pun intended :)
#

sub block_break
{
    my ($elem) = @_;
    my $lastchild = $elem->ext->{'lastchild'};

    if($lastchild eq 'block') {
	output "\n.PP\n";
    } elsif($lastchild eq 'inline' || $lastchild eq 'TP') {
	output "\n\n";
    }
}

sub inline_break
{
    my ($elem) = @_;
    my $lastchild = $elem->ext->{'lastchild'};

    if($lastchild eq 'block') {
	output "\n\n";
    } elsif($lastchild eq 'TP') {
	# In roff, TP is not actually a container,
	# so if you want to get out of it, you must
	# use .PP
	output "\n.PP\n";
    }
}

    


##################################################
#
# Man page management
#
##################################################

$sgmlspl->{start_element}->{'manpage'} = sub {
    my ($elem) = @_;

    my $filename = man_fileinfo($elem->attribute('title'),
				$elem->attribute('sect'));

    $filename =~ s#^([^./])#./$1#;	# avoid magic open
    open(MANPAGE, ">$filename" . "\0");
    $manxml::newline_last = 1;

    output ".TH ";

    # Nothing in the man macros say this has to be the same as
    # the $file and $sect.  While it is best to follow convention,
    # some stylesheets may want to uppercase/lowercase the name, 
    # so it is best leave this to them.
    output man_arg_escape($elem->attribute('h1')) . ' ';
    output man_arg_escape($elem->attribute('h2')) . ' ';
    
    output man_arg_escape($elem->attribute('h3')) . ' ';
    output man_arg_escape($elem->attribute('h4')) . ' ';
    output man_arg_escape($elem->attribute('h5')) . "\n";
};

# mandb needs a one-line format, apparently
$sgmlspl->{start_element}->{'refnameline'} = sub {
    my ($elem) = @_;
    $elem->ext->{'outputmode'} = 'strip-newline';
};

$sgmlspl->{start_element}->{'refname'} = sub {
    my ($elem) = @_;
    $elem->ext->{'outputmode'} = 'strip-newline-save';
};
$sgmlspl->{end_element}->{'refname'} = sub {
    my ($elem) = @_;
    my $refname = $elem->ext->{'outputsave'};
    push(@{$elem->parent->ext->{'refnames'}}, $refname);
};
$sgmlspl->{start_element}->{'refpurposeminus'} = sub {
    # We don't need Unicode translation in most cases...
    output ' \- ';
};

$sgmlspl->{end_element}->{'manpage'} = sub {
    my ($elem) = @_;

    close(MANPAGE);

    my $mainfilename = man_fileinfo($elem->attribute('title'),
				    $elem->attribute('sect'));

    # Make the files for the alternate names for the man page
    foreach my $refname (@{$elem->ext->{'refnames'}})
    {
	my $filename = man_fileinfo($refname, $elem->attribute('sect'));

	if($filename eq $mainfilename || $manxml::options{'no-links'}) {
	    # Same as main man page, don't make link.

	} elsif($manxml::options{'symlinks'}) {
	    symlink($mainfilename, $filename);
	
	} else {
	    my $sectnum = $1 if $mainfilename =~ /^.+\.(\d)/;
	    $filename =~ s#^([^./])#./$1#;	# avoid magic open
	    open(LINK, ">$filename" . "\0");
	    print LINK ".so man${sectnum}/$mainfilename\n";
	    close(LINK);
	}
    }
};



##################################################
#
# Headings
#
##################################################

sub section_start_handler {
    my ($elem, $sgmlspl) = @_;

    $elem->parent->ext->{'lastchild'} = '';
    output "\n." . $elem->name . ' ';

    $elem->ext->{'outputmode'} = 'strip-newline';
}

sub section_end_handler {
    output "\n";
}

foreach my $gi (qw(SH SS))
{
    $sgmlspl->{start_element}->{$gi} = \&section_start_handler;
    $sgmlspl->{end_element}->{$gi} = \&section_end_handler;
}
    

##################################################
#
# Font elements
#
##################################################

$sgmlspl->{start_element}->{'fB'} = sub {
    my ($elem, $sgmlspl) = @_;
    inline_break($elem->parent);
    
    # If the last font is also bold, don't change anything.
    # Basically this is to just get more readable man output.
    if($elem->parent->name ne 'fB') {
	
	# Don't touch newline status, since \fB.PP is legal
	# and won't introduce extra newline
	print MANPAGE '\fB';
    }
    
    $elem->parent->ext->{'lastchild'} = 'inline';
};

$sgmlspl->{end_element}->{'fB'} = sub {
    my ($elem, $sgmlspl) = @_;

    if($elem->parent->name eq 'fI') { print MANPAGE '\fI' }
    elsif($elem->parent->name eq 'fB') { }
    else { print MANPAGE '\fR' }
};

$sgmlspl->{start_element}->{'fI'} = sub {
    my ($elem, $sgmlspl) = @_;
    inline_break($elem->parent);
    
    # If the last font is also italic, don't change anything.
    # Basically this is to just get more readable man output.
    if($elem->parent->name ne 'fI') {
	
	# Don't touch newline status, since \fB.PP is legal
	# and won't introduce extra newline
	print MANPAGE '\fI';
    }
    
    $elem->parent->ext->{'lastchild'} = 'inline';
};

$sgmlspl->{end_element}->{'fI'} = sub {
    my ($elem, $sgmlspl) = @_;

    if($elem->parent->name eq 'fI') { }
    elsif($elem->parent->name eq 'fB') { print MANPAGE '\fB' }
    else { print MANPAGE '\fR' }
};




    

##################################################
#
# Paragraph
#
##################################################

$sgmlspl->{start_element}->{'para'} = sub {
    my ($elem) = @_;

    block_break($elem->parent);
    $elem->parent->ext->{'lastchild'} = 'block';
};
$sgmlspl->{end_element}->{'para'} = sub {
    output "\n";
};
    


##################################################
#
# Indented paragraph
#
##################################################

$sgmlspl->{start_element}->{'TPentry'} = sub {
    my ($elem) = @_;

    # Start a new line, but skip /NO/ lines
    my $lastchild = $elem->parent->ext->{'lastchild'};
    output "\n" if $lastchild eq 'inline' || $lastchild eq 'block';
	
    $elem->parent->ext->{'lastchild'} = 'TP';

    $elem->ext->{'lastchild'} = '';
};
$sgmlspl->{end_element}->{'TPentry'} = sub {
    output "\n";
};

$sgmlspl->{start_element}->{'TP'} = sub {
    my ($elem) = @_;
    output "\n.TP " . $elem->attribute('i') . "\n";
    $elem->ext->{'outputmode'} = 'strip-newline';
};
$sgmlspl->{end_element}->{'TP'} = sub {
    output "\n";
};

# Never seen man pages that use this though...
$sgmlspl->{start_element}->{'HP'} = sub {
    my ($elem) = @_;

    # Start a new line, but skip /NO/ lines
    my $lastchild = $elem->parent->ext->{'lastchild'};
    output "\n" if $lastchild eq 'inline' || $lastchild eq 'block';
	
    $elem->parent->ext->{'lastchild'} = 'TP';
    
    $elem->ext->{'lastchild'} = '';

    output "\n.HP " . $elem->attribute('i') . "\n";
};


 
##################################################
#
# Plain old indent
#
##################################################

$sgmlspl->{start_element}->{'RS'} = sub {
    my ($elem) = @_;

    block_break($elem->parent);
    $elem->parent->ext->{'lastchild'} = 'block';
    
    my $indent = $elem->attribute('i');
    output ".RS $indent\n";
};
$sgmlspl->{end_element}->{'RS'} = sub {
    output "\n.RE\n";
};

##################################################
#
# Verbatim
#
##################################################

$sgmlspl->{start_element}->{'verbatim'} = sub {
    my ($elem) = @_;
    
    block_break($elem->parent);
    $elem->parent->ext->{'lastchild'} = '';
    
    $elem->ext->{'outputmode'} = 'preserve';
    
    output ".nf\n";
};
$sgmlspl->{end_element}->{'verbatim'} = sub {
    output "\n.fi\n\n";
};

##################################################
#
# Spacing
#
##################################################

$sgmlspl->{start_element}->{'sp'} = sub {
    my ($elem) = @_;
    output "\n.sp " . $elem->attribute('n') . "\n";
};

##################################################
#
# Character data
#
##################################################

$sgmlspl->{characters} = sub {
    my ($s, $elem, $sgmlspl) = @_;
    
    $s = man_escape($s);
	
    # Output mode of '' does not override parent default modes
    # Since there is not much element nesting in Man-XML,
    # hopefully this search isn't too inefficient ...
    #
    my $outputmode, my $this;
    for ($this = $elem; $this; $this = $this->parent) {
	$outputmode = $this->ext->{'outputmode'} and last;
    }
    if(!defined $this) { $this = $elem }
 
    if($outputmode eq '') {
	# Collapse whitespace
        $s =~ tr/ \t/ /s;
	
	# Collapse newlines
	$s =~ tr/\n//s;

        # No spaces at beginning of lines
	$s =~ s/^ // if $manxml::newline_last;
	$s =~ s/(\n \n?)+/\n/g;
	 
	if($s =~ /[^\s]/) {
	    inline_break($elem);
	    $elem->ext->{'lastchild'} = 'inline';
	}
	
	output $s;
    }
     
    elsif($outputmode =~ /^strip-newline/) {
	# Newlines, die!
        $s =~ tr/ \t\n/ /s;

	# NOTE: Unlike texi_xml, man_xml does not need markup
	# text to be saved at all, so output saving is merged 
	# in with the element's outputmode.
	if($outputmode =~ /save$/) {
	    # No spaces at beginning of lines
	    $s =~ s/^ +// if $this->ext->{'outputsave'} eq '';
	    $this->ext->{'outputsave'} .= $s;

	    if($s ne '') {
	        inline_break($elem);
		$elem->ext->{'lastchild'} = 'inline';
		output $s;
	    }
	} else {
	    # No spaces at beginning of lines
	    $s =~ s/^ +// if $manxml::newline_last;
	    
	    if($s ne '') {
	        inline_break($elem);
		$elem->ext->{'lastchild'} = 'inline';
		output $s;
	    }
	}
    
    } elsif($outputmode eq 'preserve') {
	if($manxml::newline_last and $s =~ /^\n/) {
	    # Make another line anyway
	    output "\n\n";
	}

	inline_break($elem);
	$elem->ext->{'lastchild'} = 'inline';
			 
	output $s;
    
    } else {
	die("Unknown output mode $outputmode\n");
    }

};




##################################################
#
# Comments
#
##################################################

$sgmlspl->{comment} = sub {
    my ($s, $elem, $sgmlspl) = @_;

    $s =~ tr/\n/ /s;
   
    if($manxml::newline_last) {
	output '.\"' . $s . "\n";
    } else {
	output '\"' . $s . "\n";
    }
};


##################################################
#
# Processing instruction
#
##################################################

$sgmlspl->{processing_instruction} = sub {
    my ($target, $data, $sgmlspl) = @_;

    # Allow processing instructions to use newlines(?)
    $data =~ s/\&#xA;/\n/g;
    $data =~ s/\&#10;/\n/g;

    if($target eq 'man') {
        output $data;
    }
};




##################################################
#
# Main
#
##################################################

unshift(@ARGV, '-') unless @ARGV;
my $parser = XML::Parser::PerlSAX->new(Handler => $sgmlspl);

foreach $manxml::inputfile (@ARGV)
{
    $parser->parse(Source => { SystemId => $manxml::inputfile });
}

