#!/usr/bin/perl
# svn2html.pl - Convert the XML output of "svn log" to (X)HTML

# Copyright (C) 2004 Anderson Lizardo
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA

# $Id: svn2html.pl,v 1.4 2004/07/11 20:50:10 lizardo Exp $

use strict;
use warnings;

use Getopt::Long;
use Pod::Usage;
use POSIX qw(strftime);
use XML::Parser;

# Commit messages may be in Unicode, so we need to tell Perl about it
binmode(STDOUT, ":utf8");

# Output only the latest $entries_limit author(s) commits
my $entries_limit = 10;

my $help = 0;
my $man = 0;
my $infile = "";
my $with_filename = 0;
my $with_branchname = 0;

GetOptions(
    "help" => \$help,
    "man" => \$man,
    "infile=s" => \$infile,
    "with-filename", \$with_filename,
    "with-branchname", \$with_branchname,
) or pod2usage(1);

pod2usage(1) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;

my $date = "";     # Current date
my $buffer = "";   # Current text in buffer
my $author = "";   # Current author
my %messages = (); # Commit messages of the same date
my $files = "";    # Files affected by commit
my $branches = ""; # Branches affected by commit

my $entry_count = 0;

my %users = load_mapfile(); # user <-> Full Name conversion map

my $parser = new XML::Parser(
    Handlers => {
        Start => \&handle_StartTag,
        End => \&handle_EndTag,
        Char => \&handle_Text,
    },
);

if ($infile) {
    eval { $parser->parsefile($infile) } or pod2usage("$0: $@");
}
else {
    $parser->parse(\*STDIN);
}

sub handle_StartTag {
    $buffer = "";
}

sub handle_EndTag {
    my (undef, $tag) = @_;

	if ($tag eq "author") {
		$author = $users{$buffer} ? $users{$buffer} : $buffer;
    }
    elsif ($tag eq "date" and $buffer =~ /^(\d{4}-\d{2}-\d{2})T/) {
		# Flush buffer if date has changed
        print_log() if ($date and $1 ne $date);
        $date = $1;
    }
	elsif ($tag eq "path") {
		#my @path = split('/', $buffer);
		# Remove the first (empty) element from @path
		#shift @path;
		#my $branch = shift @path;
		#$branch = shift @path if ($branch eq "branches" or $branch eq "tags");
		#$branches .= $branch . ", " unless ($branches =~ /(^|\s+)$branch,/);
        #$files .= File::Spec->catdir(join('/', @path)) . ", ";
    }
    elsif ($tag eq "msg") {
		# Remove ASCII "bullets" ("* like this") from commit messages
		$buffer =~ s/^\s*\*\s+//;
        #if ($with_filename) {
        #    $files =~ s/, $/: /;
        #    $buffer = $files . $buffer;
        #    $files = "";
        #}
		#if ($with_branchname) {
		#    $branches =~ s/, $/: /;
		#    $buffer = $branches . $buffer;
		#    $branches = "";
		#}
		chomp $buffer;
        unshift @{$messages{$author}}, $buffer;
    }
	elsif ($tag eq "log") {
		print_log();
	}
}

sub handle_Text {
    my (undef , $text) = @_;

    # Encode "special" entities
    $text =~ s/\&/\&amp;/g;
    $text =~ s/</\&lt;/g;
    $text =~ s/>/\&gt;/g;
    #$text =~ s/\"/\&quot;/g;
    #$text =~ s/\'/\&apos;/g;

    # Add current text to the buffer
    $buffer .= $text;
}

# Convert ISO 8601 date (yyyy-mm-dd) to the specified format
sub isodate2any {
    my ($date, $format) = @_;
    if ($date =~ /(\d{4})-(\d{2})-(\d{2})/) {
        return strftime($format, 0, 0, 0, $3, $2 - 1, $1 - 1900);
    }
    else {
        return undef;
    }
}

sub print_log {
	foreach (sort (keys %messages)) {
		exit 0 if $entry_count++ == $entries_limit;
		print "<ul>\n\t<li>\n\t\t<h4>" . $_ . " - " .
	    isodate2any($date, '%Y/%m/%d') . "</h4>\n";
    	print "\t\t<ul>\n";
		foreach (@{$messages{$_}}) {
        	print "\t\t\t<li>" . $_ . "</li>\n";
    	}
    	print "\t\t</ul>\n";
    	print "\t</li>\n</ul>\n\n";
	}
    %messages = ();
}

sub load_mapfile {
	my %map;
	my $map_file = '/etc/passwd';

	open(PASSWD, $map_file) || die "Could not open $map_file\: $!";
	while (<PASSWD>) {
		chomp;
		my @user = split ':';
		my $login = $user[0];
		my ($fn) = split(',', $user[4]);
		$map{$login} = $fn;
	}
	close PASSWD;
	return %map;
}

__END__

=head1 NAME

svn2html.pl - convert the XML output of "svn log" to (X)HTML

=head1 SYNOPSIS

svn2html.pl [--help|--man] [--with-filename] [--with-branchname] [--infile xml_file]

    Options:
        --infile          Parse XML from a file
        --with-branchname Prepend branch names to commit messages
        --with-filename   Prepend filenames to commit messages
        --help            Show brief help message
        --man             Full documentation

=head1 DESCRIPTION

B<svn2html.pl> converts the XML code produced by "svn log --xml" to
HTML or XHTML code.

=head1 OPTIONS

=over

=item B<--infile xml_file>

Specify which XML file to parse. This file must be the output of "svn log --xml".
By default, B<svn2html.pl> reads XML code from standard input.

=item B<--with-branchname>

This option prepends branch names to each commit message.

=item B<--with-filename>

This option prepends filenames to each commit message.

=item B<--help>

Print a brief help message and exits.

=item B<--man>

Print the manual page and exits.

=back

=head1 AUTHOR

Copyright (C) 2004 Anderson Lizardo <lizardo@linuxfromscratch.org>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

=cut

