#!/usr/bin/perl

use strict;
use warnings;

use WWW::Mechanize;
use Getopt::Long;
use Pod::Usage;
use OpenGuides::RDF::Reader;
use XML::RSS;
use Data::Dumper;
use OpenGuides;
use OpenGuides::Config;

our $VERSION = 0.02;
my $help = 0;
my $list_version = 0;
my $days=0;
my ($site,$config_file);

(GetOptions(
	'site=s' => \$site,
	'config=s' => \$config_file,
	'days=i' => \$days,
	'help+' => \$help,
	'version!' => \$list_version,
	) && $site && $config_file)
	|| pod2usage( -verbose => 0 );

pod2usage( -verbose => $help) if $help;

(print STDERR "$0 version $VERSION\n\n"), exit 0
	if $list_version;

=head1 NAME

mirror.pl - Replicate an OpenGuides site

=head1 SYNOPSIS

mirror.pl --site http://from.site.url/ --config /path/to/wiki.conf [--days 1]

=head1 DESCRIPTION

This is a script to mirror the contents from another OpenGuides website.
It can be run from a cron job to update periodically.

To initially load the wiki, run the script without the --days option. Then,
the script can be run periodically with the --days option, to keep the site
in line.

=head1 OPTIONS

=over 4

=item C<--site | -s>

Specify the guide website to mirror from.

=item C<--config | -c>

Path to the config file for the wiki on the localhost.

=item C<--days | -d>

Number of days back to look at in the RSS feed. Omit this option to work
in "hoover" mode. 

=item C<--help | -h>

Show this list of options.

=item C<--help --help | -h -h>

Display man page.

=item C<--version>

Show the mirror script's version number

=back

=head1 HISTORY

0.01 18-Oct-2005  Initial version

0.02 19-Oct-2005  Exclude updates for pages that haven't changed

=cut

my $agent = WWW::Mechanize->new();

my $config = OpenGuides::Config->new( file => $config_file );
my $guide = OpenGuides->new( config => $config );
my $wiki = $guide->wiki;

my @pagelist = $days ? get_recent_changes($agent, $site, $days) :
	get_all_pages($agent, $site);

 $|=1;

for (@pagelist) {
    chomp;
    print $_,":";

    my %meta = eval { get_page_metadata($agent, $site, $_) };
    (print "Failed to parse metadata\n"),next if $@;
    
#    print Dumper \%meta;
    my $text = get_page_content($agent, $site, $_);

#    print $text;

    populate_local_wiki($wiki, $_, $text, \%meta);
 }

sub get_all_pages {
    my ($ua, $url) = @_;

    $ua->get("$url?action=index;format=plain");

    split /\n/,$ua->content;
}

sub get_recent_changes {
    my ($ua, $url, $days) = @_;

    $ua->get("$url?action=rss;days=$days");
    my $rss = XML::RSS->new;
    $rss->parse($ua->content);
    reverse map {$_->{title}} @{$rss->{items}};
}

sub get_page_metadata {
    my ($ua, $url, $page) = @_;

    $ua->get("$url?id=$page;format=rdf");

    my $rdf = $ua->content;

    parse_rdf($rdf);
}

sub get_page_content {
    my ($ua, $url, $page) = @_;

    $ua->get("$url?id=$page;format=raw");

    $ua->content;
}

sub populate_local_wiki {
    my ($wiki, $page, $content, $metadata) = @_;

    my $node = $wiki->formatter->node_param_to_node_name( $page );
    my %old_data = $wiki->retrieve_node($node);
    
    if ($old_data{version}) {
        (print "Unchanged\n"), return
	    if $old_data{metadata}{version}[0] == $metadata->{version};
	print "Updating... ";
    }
    else {
    	print "Creating... ";
    }

    my $written = $wiki->write_node( $node, $content, $old_data{checksum}, $metadata);

    print $written ? "Done\n" : "Failed\n";
}

