Mirror: mirror.pl

File mirror.pl, 3.3 KB (added by Dominic Hargreaves, 16 years ago)
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6use WWW::Mechanize;
7use Getopt::Long;
8use Pod::Usage;
9use OpenGuides::RDF::Reader;
10use XML::RSS;
11use Data::Dumper;
12use OpenGuides;
13use OpenGuides::Config;
14
15our $VERSION = 0.02;
16my $help = 0;
17my $list_version = 0;
18my $days=0;
19my ($site,$config_file);
20
21(GetOptions(
22        'site=s' => \$site,
23        'config=s' => \$config_file,
24        'days=i' => \$days,
25        'help+' => \$help,
26        'version!' => \$list_version,
27        ) && $site && $config_file)
28        || pod2usage( -verbose => 0 );
29
30pod2usage( -verbose => $help) if $help;
31
32(print STDERR "$0 version $VERSION\n\n"), exit 0
33        if $list_version;
34
35=head1 NAME
36
37mirror.pl - Replicate an OpenGuides site
38
39=head1 SYNOPSIS
40
41mirror.pl --site http://from.site.url/ --config /path/to/wiki.conf [--days 1]
42
43=head1 DESCRIPTION
44
45This is a script to mirror the contents from another OpenGuides website.
46It can be run from a cron job to update periodically.
47
48To initially load the wiki, run the script without the --days option. Then,
49the script can be run periodically with the --days option, to keep the site
50in line.
51
52=head1 OPTIONS
53
54=over 4
55
56=item C<--site | -s>
57
58Specify the guide website to mirror from.
59
60=item C<--config | -c>
61
62Path to the config file for the wiki on the localhost.
63
64=item C<--days | -d>
65
66Number of days back to look at in the RSS feed. Omit this option to work
67in "hoover" mode.
68
69=item C<--help | -h>
70
71Show this list of options.
72
73=item C<--help --help | -h -h>
74
75Display man page.
76
77=item C<--version>
78
79Show the mirror script's version number
80
81=back
82
83=head1 HISTORY
84
850.01 18-Oct-2005  Initial version
86
870.02 19-Oct-2005  Exclude updates for pages that haven't changed
88
89=cut
90
91my $agent = WWW::Mechanize->new();
92
93my $config = OpenGuides::Config->new( file => $config_file );
94my $guide = OpenGuides->new( config => $config );
95my $wiki = $guide->wiki;
96
97my @pagelist = $days ? get_recent_changes($agent, $site, $days) :
98        get_all_pages($agent, $site);
99
100 $|=1;
101
102for (@pagelist) {
103    chomp;
104    print $_,":";
105
106    my %meta = eval { get_page_metadata($agent, $site, $_) };
107    (print "Failed to parse metadata\n"),next if $@;
108   
109#    print Dumper \%meta;
110    my $text = get_page_content($agent, $site, $_);
111
112#    print $text;
113
114    populate_local_wiki($wiki, $_, $text, \%meta);
115 }
116
117sub get_all_pages {
118    my ($ua, $url) = @_;
119
120    $ua->get("$url?action=index;format=plain");
121
122    split /\n/,$ua->content;
123}
124
125sub get_recent_changes {
126    my ($ua, $url, $days) = @_;
127
128    $ua->get("$url?action=rss;days=$days");
129    my $rss = XML::RSS->new;
130    $rss->parse($ua->content);
131    reverse map {$_->{title}} @{$rss->{items}};
132}
133
134sub get_page_metadata {
135    my ($ua, $url, $page) = @_;
136
137    $ua->get("$url?id=$page;format=rdf");
138
139    my $rdf = $ua->content;
140
141    parse_rdf($rdf);
142}
143
144sub get_page_content {
145    my ($ua, $url, $page) = @_;
146
147    $ua->get("$url?id=$page;format=raw");
148
149    $ua->content;
150}
151
152sub populate_local_wiki {
153    my ($wiki, $page, $content, $metadata) = @_;
154
155    my $node = $wiki->formatter->node_param_to_node_name( $page );
156    my %old_data = $wiki->retrieve_node($node);
157   
158    if ($old_data{version}) {
159        (print "Unchanged\n"), return
160            if $old_data{metadata}{version}[0] == $metadata->{version};
161        print "Updating... ";
162    }
163    else {
164        print "Creating... ";
165    }
166
167    my $written = $wiki->write_node( $node, $content, $old_data{checksum}, $metadata);
168
169    print $written ? "Done\n" : "Failed\n";
170}
171