Changeset 469


Ignore:
Timestamp:
Sep 15, 2004, 4:41:33 PM (17 years ago)
Author:
kake
Message:

Added option of using Plucene for searching.

Location:
trunk
Files:
1 added
10 edited

Legend:

Unmodified
Added
Removed
  • trunk/Build.PL

    r468 r469  
    3737{ question => "What URL does the install directory map to?",
    3838  variable => "script_url",
     39  },
     40{ question => "Do you want to use Plucene for searching? (recommended, but see Changes file before saying yes to this)",
     41  variable => "use_plucene",
     42  default  => "n",
     43  type     => "y_n"
    3944  },
    4045{ question => "What directory can I use to store indexes in for searching? "
  • trunk/Changes

    r468 r469  
    1 0.39   
     10.39    15 September 2004
    22        Split commit_node out into OpenGuides.pm in preparation for spam filter
     3        Added option of using Plucene for searching.  If you want to do this
     4          (and it is recommended over the default of Search::InvertedIndex)
     5          you will need to do two things:
     6            - either delete your old indexes (they're just files in the index
     7              directory) or use a different index directory
     8            - reindex your entire wiki (see reindex.pl in the examples/
     9              directory of this distribution)
    310
    4110.38    26 July 2004
  • trunk/MANIFEST

    r468 r469  
    99TROUBLESHOOTING
    1010examples/london.css
     11examples/reindex.pl
    1112examples/vegan-oxford.css
    1213lib/OpenGuides/Build.pm
  • trunk/lib/OpenGuides/SuperSearch.pm

    r468 r469  
    148148
    149149        # Redirect to a single result only if the title is a good enough match.
    150         my %fuzzies = $self->{wiki}->fuzzy_title_match($self->{search_string});
    151         if ($numres == 1 && !$self->{return_tt_vars} && scalar keys %fuzzies) {
    152             my $node = $results[0]{name};
    153             my $output = CGI::redirect( $self->{wikimain} . "?"
    154                                         . CGI::escape($node) );
    155             return $output if $self->{return_output};
    156             print $output;
    157             exit;
    158         } else {
    159             # We browse through the results a page at a time.
    160 
    161             # Figure out which results we're going to be showing on this
    162             # page, and what the first one for the next page will be.
    163             my $startpos = $vars{next} || 0;
    164             $tt_vars{first_num} = $numres ? $startpos + 1 : 0;
    165             $tt_vars{last_num}  = $numres > $startpos + 20
    166                                     ? $startpos + 20
    167                                     : $numres;
    168             $tt_vars{total_num} = $numres;
    169             if ( $numres > $startpos + 20 ) {
    170                 $tt_vars{next_page_startpos} = $startpos + 20;
    171             }
    172 
    173             # Sort the results - by distance if we're searching on that
    174             # or by score otherwise.
    175             if ( $vars{distance_in_metres} ) {
    176                 @results = sort { $a->{distance} <=> $b->{distance} } @results;
    177             } else {
    178                 @results = sort { $b->{score} <=> $a->{score} } @results;
    179             }
    180 
    181             # Now snip out just the ones for this page.  The -1 is
    182             # because arrays index from 0 and people from 1.
    183             my $from = $tt_vars{first_num} ? $tt_vars{first_num} - 1 : 0;
    184             my $to   = $tt_vars{last_num} - 1; # kludge to empty arr for no res
    185             @results = @results[ $from .. $to ];
    186 
    187             $tt_vars{results} = \@results;
     150        # (Don't try a fuzzy search on a blank search string - Plucene chokes.)
     151        if ( $self->{search_string} ) {
     152            my %fuzzies =
     153                      $self->{wiki}->fuzzy_title_match($self->{search_string});
     154            if ( $numres == 1
     155                 && !$self->{return_tt_vars} && scalar keys %fuzzies) {
     156                my $node = $results[0]{name};
     157                my $output = CGI::redirect( $self->{wikimain} . "?"
     158                                            . CGI::escape($node) );
     159                return $output if $self->{return_output};
     160                print $output;
     161                exit;
     162            }
     163        }
     164
     165        # We browse through the results a page at a time.
     166
     167        # Figure out which results we're going to be showing on this
     168        # page, and what the first one for the next page will be.
     169        my $startpos = $vars{next} || 0;
     170        $tt_vars{first_num} = $numres ? $startpos + 1 : 0;
     171        $tt_vars{last_num}  = $numres > $startpos + 20
     172                                                       ? $startpos + 20
     173                                                       : $numres;
     174        $tt_vars{total_num} = $numres;
     175        if ( $numres > $startpos + 20 ) {
     176            $tt_vars{next_page_startpos} = $startpos + 20;
    188177        }
     178
     179        # Sort the results - by distance if we're searching on that
     180        # or by score otherwise.
     181        if ( $vars{distance_in_metres} ) {
     182            @results = sort { $a->{distance} <=> $b->{distance} } @results;
     183        } else {
     184            @results = sort { $b->{score} <=> $a->{score} } @results;
     185        }
     186
     187        # Now snip out just the ones for this page.  The -1 is
     188        # because arrays index from 0 and people from 1.
     189        my $from = $tt_vars{first_num} ? $tt_vars{first_num} - 1 : 0;
     190        my $to   = $tt_vars{last_num} - 1; # kludge to empty arr for no results
     191        @results = @results[ $from .. $to ];
     192
     193        $tt_vars{results} = \@results;
    189194    }
    190195
  • trunk/lib/OpenGuides/Utils.pm

    r468 r469  
    33use strict;
    44use vars qw( $VERSION );
    5 $VERSION = '0.06';
     5$VERSION = '0.07';
    66
    77use Carp qw( croak );
     
    99use CGI::Wiki::Formatter::UseMod;
    1010use CGI::Wiki::Plugin::RSS::Reader;
    11 use CGI::Wiki::Search::SII;
    12 use Search::InvertedIndex::DB::DB_File_SplitHash;
    1311use URI::Escape;
    1412
     
    6058=item *
    6159
    62 indexing_directory - for the L<Search::InvertedIndex> files to go
     60indexing_directory - for the L<Search::InvertedIndex> or L<Plucene> files to go
    6361
    6462=back
     
    9492
    9593    # Make search.
    96     my $indexdb = Search::InvertedIndex::DB::DB_File_SplitHash->new(
    97         -map_name  => $config->{_}{indexing_directory},
    98         -lock_mode => "EX"
    99     );
    100     my $search  = CGI::Wiki::Search::SII->new( indexdb => $indexdb );
     94    my $search;
     95    if ( $config->{_}{use_plucene}
     96         && ( lc($config->{_}{use_plucene}) eq "y"
     97              || $config->{_}{use_plucene} == 1 )
     98       ) {
     99        require CGI::Wiki::Search::Plucene;
     100        $search = CGI::Wiki::Search::Plucene->new(
     101                                       path => $config->{_}{indexing_directory}
     102                                                 );
     103    } else {
     104        require CGI::Wiki::Search::SII;
     105        require Search::InvertedIndex::DB::DB_File_SplitHash;
     106        my $indexdb = Search::InvertedIndex::DB::DB_File_SplitHash->new(
     107            -map_name  => $config->{_}{indexing_directory},
     108            -lock_mode => "EX"
     109        );
     110        $search = CGI::Wiki::Search::SII->new( indexdb => $indexdb );
     111    }
    101112
    102113    # Make formatter.
  • trunk/t/31_supersearch.t

    r468 r469  
    1010} else {
    1111    plan tests => 17;
     12
     13    # Clear out the database from any previous runs.
     14    unlink "t/node.db";
     15    unlink <t/indexes/*>;
    1216
    1317    CGI::Wiki::Setup::SQLite::setup( { dbname => "t/node.db" } );
     
    2327                   };
    2428
     29    # Plucene is the recommended searcher now.
     30    eval { require CGI::Wiki::Search::Plucene; };
     31    unless ( $@ ) {
     32        $config->{_}{use_plucene} = 1;
     33    }
     34
    2535    my $search = OpenGuides::SuperSearch->new( config => $config );
    2636    isa_ok( $search, "OpenGuides::SuperSearch" );
    27 
    28     # Clear out the database from any previous runs.
    29     my $wiki = $search->{wiki}; # white boxiness
    30     foreach my $del_node ( $wiki->list_all_nodes ) {
    31         print "# Deleting node $del_node\n";
    32         $wiki->delete_node( $del_node ) or die "Can't delete $del_node";
    33     }
    3437
    3538    my $output = $search->run( return_output => 1 );
     
    5457
    5558    # Pop some data in and search again.
    56     $wiki = $search->{wiki}; # white boxiness
     59    my $wiki = $search->{wiki}; # white boxiness
    5760    $wiki->write_node( "Banana", "banana" );
    5861    $wiki->write_node( "Monkey", "banana brains" );
  • trunk/t/32_supersearch_simple_metadata.t

    r468 r469  
    1212      unless $have_sqlite;
    1313
     14    # Clear out the database from any previous runs.
     15    unlink "t/node.db";
     16    unlink <t/indexes/*>;
     17
    1418    CGI::Wiki::Setup::SQLite::setup( { dbname => "t/node.db" } );
    1519    my $config = Config::Tiny->new;
     
    2428                   };
    2529
     30    # Plucene is the recommended searcher now.
     31    eval { require CGI::Wiki::Search::Plucene; };
     32    unless ( $@ ) {
     33        $config->{_}{use_plucene} = 1;
     34    }
     35
    2636    my $search = OpenGuides::SuperSearch->new( config => $config );
    2737
    28     # Clear out the database from any previous runs.
     38    # Add some data.  We write it twice to avoid hitting the redirect.
    2939    my $wiki = $search->{wiki}; # white boxiness
    30     foreach my $del_node ( $wiki->list_all_nodes ) {
    31         $wiki->delete_node( $del_node ) or die "Can't delete $del_node";
    32     }
    33 
    34     # Add some data.  We write it twice to avoid hitting the redirect.
    35     $wiki = $search->{wiki}; # white boxiness
    3640    $wiki->write_node( "Calthorpe Arms", "Serves beer.", undef,
    3741                       { category => "Pubs", locale => "Holborn" } );
  • trunk/t/33_supersearch_advanced_search.t

    r468 r469  
    1212      unless $have_sqlite;
    1313
     14    # Clear out the database from any previous runs.
     15    unlink "t/node.db";
     16    unlink <t/indexes/*>;
     17
    1418    CGI::Wiki::Setup::SQLite::setup( { dbname => "t/node.db" } );
    1519    my $config = Config::Tiny->new;
     
    2428                   };
    2529
     30    # Plucene is the recommended searcher now.
     31    eval { require CGI::Wiki::Search::Plucene; };
     32    unless ( $@ ) {
     33        $config->{_}{use_plucene} = 1;
     34    }
     35
    2636    my $search = OpenGuides::SuperSearch->new( config => $config );
    2737
    28     # Clear out the database from any previous runs.
     38    # Add some data.  Write more than one pub to avoid hitting the redirect.
    2939    my $wiki = $search->{wiki}; # white boxiness
    30     foreach my $del_node ( $wiki->list_all_nodes ) {
    31         $wiki->delete_node( $del_node ) or die "Can't delete $del_node";
    32     }
    33 
    34     # Add some data.  Write more than one pub to avoid hitting the redirect.
    35     $wiki = $search->{wiki}; # white boxiness
    3640    my $ctdata = {
    3741                   os_x      => 523465,
  • trunk/t/35_supersearch_two_searches.t

    r468 r469  
    1010} else {
    1111    plan tests => 2;
     12
     13    # Clear out the database from any previous runs.
     14    unlink "t/node.db";
     15    unlink <t/indexes/*>;
    1216
    1317    CGI::Wiki::Setup::SQLite::setup( { dbname => "t/node.db" } );
     
    2327                   };
    2428
     29    # Plucene is the recommended searcher now.
     30    eval { require CGI::Wiki::Search::Plucene; };
     31    unless ( $@ ) {
     32        $config->{_}{use_plucene} = 1;
     33    }
     34
    2535    my $search = OpenGuides::SuperSearch->new( config => $config );
    2636
    27     # Clear out the database from any previous runs.
    28     my $wiki = $search->{wiki}; # white boxiness
    29     foreach my $del_node ( $wiki->list_all_nodes ) {
    30         print "# Deleting node $del_node\n";
    31         $wiki->delete_node( $del_node ) or die "Can't delete $del_node";
    32     }
    33 
    3437    # Write some data.
     38    my $wiki = $search->{wiki};
    3539    $wiki->write_node( "Wandsworth Common", "A common.", undef,
    3640                       { category => "Parks" } )
  • trunk/t/36_supersearch_order.t

    r468 r469  
    1010} else {
    1111    plan tests => 7;
     12
     13    # Clear out the database from any previous runs.
     14    unlink "t/node.db";
     15    unlink <t/indexes/*>;
    1216
    1317    CGI::Wiki::Setup::SQLite::setup( { dbname => "t/node.db" } );
     
    2327                   };
    2428
     29    # Plucene is the recommended searcher now.
     30    eval { require CGI::Wiki::Search::Plucene; };
     31    unless ( $@ ) {
     32        $config->{_}{use_plucene} = 1;
     33    }
     34
    2535    my $search = OpenGuides::SuperSearch->new( config => $config );
    2636    isa_ok( $search, "OpenGuides::SuperSearch" );
    2737
    28     # Clear out the database from any previous runs.
    29     my $wiki = $search->{wiki}; # white boxiness
    30     foreach my $del_node ( $wiki->list_all_nodes ) {
    31         print "# Deleting node $del_node\n";
    32         $wiki->delete_node( $del_node ) or die "Can't delete $del_node";
    33     }
    34 
    3538    # Write some data.
     39    my $wiki = $search->{wiki};
    3640    $wiki->write_node( "Parks", "A page about parks." )
    3741        or die "Can't write node";
Note: See TracChangeset for help on using the changeset viewer.