[interchange-cvs] interchange - heins modified lib/Vend/Swish.pm
interchange-core@icdevgroup.org
interchange-core@icdevgroup.org
Thu Oct 17 00:04:01 2002
User: heins
Date: 2002-10-17 04:03:14 GMT
Added: lib/Vend Swish.pm
Log:
* Add Swish search module.
* To use, you must add to interchange.cfg:
Require module Vend::Swish
AddDirective Swish hash
Variable swish Vend::Swish
The search is called with st=3Dswish (or mv_searchtype=3Dswish).
* The fields to return are configurable, and default to;
rf=3Dcode score title url mod_date filesize
fn=3Dcode score title url mod_date filesize
These correspond to:
code swishreccount
score swishrank
url swishdocpath
title swishtitle
filesize swishdocsize
mod_date swishlastmodified
TODO: Docs.
Revision Changes Path
1.1 interchange/lib/Vend/Swish.pm
rev 1.1, prev_rev 1.0
Index: Swish.pm
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
# Vend::Swish - Search indexes with Swish-e
#
# $Id: Swish.pm,v 1.1 2002/10/17 04:03:13 mheins Exp $
#
# Adapted from Vend::Glimpse
#
# Copyright (C) 2002 Mike Heins <mikeh@perusion.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free
# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307 USA.
package Vend::Swish;
require Vend::Search;
@ISA =3D qw(Vend::Search);
$VERSION =3D substr(q$Revision: 1.1 $, 10);
use strict;
sub array {
my ($s, $opt) =3D @_;
$s->{mv_list_only} =3D 1; # makes perform_search only return results array
return Vend::Scan::perform_search($opt, undef, $s);
}
sub hash {
my ($s, $opt) =3D @_;
$s->{mv_return_reference} =3D 'HASH';
$s->{mv_list_only} =3D 1; # makes perform_search only return results array
return Vend::Scan::perform_search($opt, undef, $s);
}
sub list {
my ($s, $opt) =3D @_;
$s->{mv_return_reference} =3D 'LIST';
$s->{mv_list_only} =3D 1; # makes perform_search only return results array
return Vend::Scan::perform_search($opt, undef, $s);
}
my %Default =3D (
matches =3D> 0,
mv_head_skip =3D> 0,
mv_index_delim =3D> "\t",
mv_record_delim =3D> "\n",
mv_matchlimit =3D> 50,
mv_max_matches =3D> 2000,
mv_min_string =3D> 4,
);
sub init {
my ($s, $options) =3D @_;
#::logDebug("initting Swish search, Swish=3D" . Vend::Util::uneval($Vend::C=
fg->{Swish}));
$Vend::Cfg->{Swish} ||=3D {};
@{$s}{keys %Default} =3D (values %Default);
$s->{mv_base_directory} =3D $Vend::Cfg->{Swish}{configfile} || undef,
$s->{mv_begin_string} =3D [];
$s->{mv_all_chars} =3D [1];
$s->{mv_case} =3D [];
$s->{mv_column_op} =3D [];
$s->{mv_negate} =3D [];
$s->{mv_numeric} =3D [];
$s->{mv_orsearch} =3D [];
$s->{mv_searchspec} =3D [];
$s->{mv_search_group} =3D [];
$s->{mv_search_field} =3D [];
$s->{mv_search_file} =3D [];
push @{$s->{mv_search_file}}, $Vend::Cfg->{Swish}{index}
if $Vend::Cfg->{Swish}{index};
$s->{mv_searchspec} =3D [];
$s->{mv_sort_option} =3D [];
$s->{mv_substring_match} =3D [];
$s->{mv_field_names} =3D [qw/code score url title filesize mod_date/];
$s->{mv_return_fields} =3D [qw/code score url title filesize mod_date/];
$s->{swish_cmd} =3D $Vend::Cfg->{Swish}{command} || '/usr/local/bin/swish-=
e';
#::logDebug("initting Swish search, swish command=3D$s->{swish_cmd}");
for(keys %$options) {
$s->{$_} =3D $options->{$_};
}
return;
}
sub new {
my ($class, %options) =3D @_;
my $s =3D new Vend::Search;
bless $s, $class;
$s->init(\%options);
return $s;
}
sub search {
my($s,%options) =3D @_;
my(@out);
my($limit_sub,$return_sub,$delayed_return);
my($dict_limit,$f,$key,$val);
my($searchfile, @searchfiles);
my(@specs);
my(@pats);
my %fmap =3D qw/
code swishreccount
score swishrank
url swishdocpath
title swishtitle
filesize swishdocsize
mod_date swishlastmodified
/;
while (($key,$val) =3D each %options) {
$s->{$key} =3D $val;
}
@searchfiles =3D @{$s->{mv_search_file}};
for(@searchfiles) {
$_ =3D Vend::Util::catfile($s->{mv_base_directory}, $_)
unless Vend::Util::file_name_is_absolute($_);
}
#::logDebug("gsearch: self=3D" . ::Vend::Util::uneval_it({%$s}));
$s->{mv_return_delim} =3D $s->{mv_index_delim}
unless defined $s->{mv_return_delim};
return $s->search_error("Search with swish, no swish configured.")
if ! $s->{swish_cmd};
@specs =3D @{$s->{mv_searchspec}};
@pats =3D $s->spec_check(@specs);
my @f;
for(@{$s->{mv_field_names}}) {
my $name =3D $fmap{$_} || $_;
$name =3D "<$name>";
push @f, $name;
}
=09
my $fmt_string =3D join $s->{mv_return_delim}, @f;
=09
$fmt_string .=3D $s->{mv_record_delim} eq "\n" ? '\n' : $s->{mv_record_del=
im};
return undef if $s->{matches} =3D=3D -1;
# Build swish line
my @cmd;
push @cmd, $s->{swish_cmd};
push @cmd, qq{-x '$fmt_string'};
push @cmd, "-c $s->{mv_base_directory}"
if $s->{mv_base_directory};
if(@{$s->{mv_search_file} || []} ) {
push @cmd, "-f " . join(" ", @{$s->{mv_search_file}} );
}
=09
push @cmd, "-m $s->{mv_max_matches}" if $s->{mv_max_matches};
=09
local($/) =3D $s->{mv_record_delim} || "\n";
$s->save_specs();
=09
my $spec =3D join ' ', @pats;
$spec =3D~ s/[^-\w()"\s]+//g
and $CGI::values{debug}
and ::logError("Removed unsafe characters from search string");
if(length($spec) < $s->{mv_min_string}) {
my $msg =3D errmsg(
"Swish search string less than minimum %s characters: %s",
$s->{mv_min_string},
$spec,
);
return $s->search_error($msg);
}
push @cmd, qq{-w $spec};
if(length($spec) < $s->{mv_min_string}) {
my $msg =3D ::errmsg (<<EOF, $s->{mv_min_string}, $spec);
Search strings must be at least %s characters.
You had '%s' as the operative characters of your search strings.
EOF
return $s->search_error($msg);
}
my $cmd =3D join ' ', @cmd;
my $cwd =3D `pwd`;
chomp($cwd);
#::logDebug("Swish command '$cmd' cwd=3D$cwd");
open(SEARCH, "$cmd |")
or ::logError( "Couldn't fork swish search '$cmd': $!"), next;
#$s->adjust_delimiter(\*SEARCH) if $s->{mv_delimiter_auto};
my $line;
my $field_names;
#::logDebug("search after getting fields: self=3D" . ::uneval({%$s}));
my $prospect;
my $f =3D sub { 1 };
eval {
($limit_sub, $prospect) =3D $s->get_limit($f, 1);
};
$@ and return $s->search_error("Limit subroutine creation: $@");
$f =3D $prospect if $prospect;
eval {($return_sub, $delayed_return) =3D $s->get_return(undef, 1)};
$return_sub =3D sub { return [ split $s->{mv_index_delim}, shift(@_) ] };
$@ and return $s->search_error("Return subroutine creation: $@");
my $field_names =3D join "\t", @{$s->{mv_field_names}};
$field_names =3D~ s/^\s+//;
my @laundry =3D (qw/mv_search_field mv_range_look mv_return_fields/);
$s->hash_fields(
[ split /\Q$s->{mv_index_delim}/, $field_names ],
@laundry,
);
undef $field_names;
if($limit_sub) {
while(<SEARCH>) {
#::logDebug("swish line, limit_sub: $_");
next if /^#/;
last if $_ eq ".\n";
$limit_sub->($_);
push @out, $return_sub->($_);
}
}
else {
while(<SEARCH>) {
#::logDebug("swish line: $_");
next if /^#/;
last if $_ eq ".\n";
push @out, $return_sub->($_);
}
}
if(scalar(@out) =3D=3D 1 and $out[0][0] =3D~ s/^err\w*\W+//) {
$s->{matches} =3D -1;
return $s->search_error($out[0][0]);
}
$s->{matches} =3D scalar(@out);
#::logDebug("gsearch before delayed return: self=3D" . ::Vend::Util::uneval=
_it({%$s}));
if($s->{mv_sort_field} and @{$s->{mv_sort_field}}) {
$s->hash_fields($s->{mv_field_names}, qw/mv_sort_field/);
@out =3D $s->sort_search_return(\@out);
}
#::logDebug("after delayed return: self=3D" . ::Vend::Util::uneval_it({%$s}=
));
if($s->{mv_unique}) {
my %seen;
@out =3D grep ! $seen{$_->[0]}++, @out;
$s->{matches} =3D scalar(@out);
}
if ($s->{matches} > $s->{mv_matchlimit} and $s->{mv_matchlimit} > 0) {
$s->save_more(\@out)
or ::logError("Error saving matches: $!");
if ($s->{mv_first_match}) {
splice(@out,0,$s->{mv_first_match});
$s->{mv_next_pointer} =3D $s->{mv_first_match} + $s->{mv_matchlimit};
$s->{mv_next_pointer} =3D 0
if $s->{mv_next_pointer} > $s->{matches};
}
$#out =3D $s->{mv_matchlimit} - 1;
}
if(! $s->{mv_return_reference}) {
$s->{mv_results} =3D \@out;
#::logDebug("returning search: " . Vend::Util::uneval($s));
return $s;
}
elsif($s->{mv_return_reference} eq 'LIST') {
my $col =3D scalar @{$s->{mv_return_fields}};
@out =3D map { join $s->{mv_return_delim}, @$_ } @out;
$s->{mv_results} =3D join $s->{mv_record_delim}, @out;
}
else {
my $col =3D scalar @{$s->{mv_return_fields}};
my @col;
my @names;
@names =3D @{$s->{mv_field_names}};
$names[0] eq '0' and $names[0] =3D 'code';
my %hash;
my $key;
for (@out) {
@col =3D split /$s->{mv_return_delim}/, $_, $col;
$hash{$col[0]} =3D {};
@{ $hash{$col[0]} } {@names} =3D @col;
}
$s->{mv_results} =3D \%hash;
}
#::logDebug("returning search: " . Vend::Util::uneval($s));
return $s;
}
# Unfortunate hack need for Safe searches
*escape =3D \&Vend::Search::escape;
*spec_check =3D \&Vend::Search::spec_check;
*get_scalar =3D \&Vend::Search::get_scalar;
*more_matches =3D \&Vend::Search::more_matches;
*get_return =3D \&Vend::Search::get_return;
*map_ops =3D \&Vend::Search::map_ops;
*get_limit =3D \&Vend::Search::get_limit;
*saved_params =3D \&Vend::Search::saved_params;
*range_check =3D \&Vend::Search::range_check;
*create_search_and =3D \&Vend::Search::create_search_and;
*create_search_or =3D \&Vend::Search::create_search_or;
*save_context =3D \&Vend::Search::save_context;
*dump_options =3D \&Vend::Search::dump_options;
*save_more =3D \&Vend::Search::save_more;
*sort_search_return =3D \&Vend::Search::sort_search_return;
*get_scalar =3D \&Vend::Search::get_scalar;
*hash_fields =3D \&Vend::Search::hash_fields;
*save_specs =3D \&Vend::Search::save_specs;
*restore_specs =3D \&Vend::Search::restore_specs;
*splice_specs =3D \&Vend::Search::splice_specs;
*search_error =3D \&Vend::Search::search_error;
*save_more =3D \&Vend::Search::save_more;
*sort_search_return =3D \&Vend::Search::sort_search_return;
1;
__END__