[interchange-cvs] interchange - heins modified lib/Vend/Swish.pm

interchange-core@icdevgroup.org interchange-core@icdevgroup.org
Thu Oct 17 00:04:01 2002


User:      heins
Date:      2002-10-17 04:03:14 GMT
Added:     lib/Vend Swish.pm
Log:
* Add Swish search module.

* To use, you must add to interchange.cfg:

	Require module Vend::Swish
	AddDirective Swish hash
	Variable swish Vend::Swish

  The search is called with st=3Dswish (or mv_searchtype=3Dswish).

* The fields to return are configurable, and default to;

	rf=3Dcode score title url mod_date filesize
	fn=3Dcode score title url mod_date filesize

  These correspond to:

			code		swishreccount
			score		swishrank
			url			swishdocpath
			title		swishtitle
			filesize	swishdocsize
			mod_date	swishlastmodified

TODO: Docs.

Revision  Changes    Path
1.1                  interchange/lib/Vend/Swish.pm


rev 1.1, prev_rev 1.0
Index: Swish.pm
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
# Vend::Swish - Search indexes with Swish-e
#
# $Id: Swish.pm,v 1.1 2002/10/17 04:03:13 mheins Exp $
#
# Adapted from Vend::Glimpse
#
# Copyright (C) 2002 Mike Heins <mikeh@perusion.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free
# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA  02111-1307  USA.

package Vend::Swish;
require Vend::Search;
@ISA =3D qw(Vend::Search);

$VERSION =3D substr(q$Revision: 1.1 $, 10);
use strict;

sub array {
	my ($s, $opt) =3D @_;
	$s->{mv_list_only} =3D 1; # makes perform_search only return results array
	return Vend::Scan::perform_search($opt, undef, $s);
}

sub hash {
	my ($s, $opt) =3D @_;
	$s->{mv_return_reference} =3D 'HASH';
	$s->{mv_list_only} =3D 1; # makes perform_search only return results array
	return Vend::Scan::perform_search($opt, undef, $s);
}

sub list {
	my ($s, $opt) =3D @_;
	$s->{mv_return_reference} =3D 'LIST';
	$s->{mv_list_only} =3D 1; # makes perform_search only return results array
	return Vend::Scan::perform_search($opt, undef, $s);
}

my %Default =3D (
		matches                 =3D> 0,
		mv_head_skip            =3D> 0,
		mv_index_delim          =3D> "\t",
		mv_record_delim         =3D> "\n",
		mv_matchlimit           =3D> 50,
		mv_max_matches          =3D> 2000,
		mv_min_string           =3D> 4,
);


sub init {
	my ($s, $options) =3D @_;

#::logDebug("initting Swish search, Swish=3D" . Vend::Util::uneval($Vend::C=
fg->{Swish}));
	$Vend::Cfg->{Swish} ||=3D {};
	@{$s}{keys %Default} =3D (values %Default);
	$s->{mv_base_directory}     =3D $Vend::Cfg->{Swish}{configfile} || undef,
	$s->{mv_begin_string}       =3D [];
	$s->{mv_all_chars}	        =3D [1];
	$s->{mv_case}               =3D [];
	$s->{mv_column_op}          =3D [];
	$s->{mv_negate}             =3D [];
	$s->{mv_numeric}            =3D [];
	$s->{mv_orsearch}           =3D [];
	$s->{mv_searchspec}	        =3D [];
	$s->{mv_search_group}       =3D [];
	$s->{mv_search_field}       =3D [];
	$s->{mv_search_file}        =3D [];
	push @{$s->{mv_search_file}}, $Vend::Cfg->{Swish}{index}
		if $Vend::Cfg->{Swish}{index};
	$s->{mv_searchspec}         =3D [];
	$s->{mv_sort_option}        =3D [];
	$s->{mv_substring_match}    =3D [];
	$s->{mv_field_names}      =3D [qw/code score url title filesize mod_date/];
	$s->{mv_return_fields}    =3D [qw/code score url title filesize mod_date/];
	$s->{swish_cmd} =3D $Vend::Cfg->{Swish}{command} || '/usr/local/bin/swish-=
e';
#::logDebug("initting Swish search, swish command=3D$s->{swish_cmd}");

	for(keys %$options) {
		$s->{$_} =3D $options->{$_};
	}

	return;
}

sub new {
	my ($class, %options) =3D @_;
	my $s =3D new Vend::Search;
	bless $s, $class;
	$s->init(\%options);
	return $s;
}

sub search {

	my($s,%options) =3D @_;

	my(@out);
	my($limit_sub,$return_sub,$delayed_return);
	my($dict_limit,$f,$key,$val);
	my($searchfile, @searchfiles);
	my(@specs);
	my(@pats);

	my %fmap =3D qw/
					code	swishreccount
					score	swishrank
					url		swishdocpath
					title	swishtitle
					filesize	swishdocsize
					mod_date	swishlastmodified
				/;
	while (($key,$val) =3D each %options) {
		$s->{$key} =3D $val;
	}

	@searchfiles =3D @{$s->{mv_search_file}};

	for(@searchfiles) {
		$_ =3D Vend::Util::catfile($s->{mv_base_directory}, $_)
			unless Vend::Util::file_name_is_absolute($_);
	}

#::logDebug("gsearch: self=3D" . ::Vend::Util::uneval_it({%$s}));
	$s->{mv_return_delim} =3D $s->{mv_index_delim}
		unless defined $s->{mv_return_delim};

	return $s->search_error("Search with swish, no swish configured.")
		if ! $s->{swish_cmd};

	@specs =3D @{$s->{mv_searchspec}};

	@pats =3D $s->spec_check(@specs);

	my @f;

	for(@{$s->{mv_field_names}}) {
		my $name =3D $fmap{$_} || $_;
		$name =3D "<$name>";
		push @f, $name;
	}
=09
	my $fmt_string =3D join $s->{mv_return_delim}, @f;
=09
	$fmt_string .=3D $s->{mv_record_delim} eq "\n" ? '\n' : $s->{mv_record_del=
im};

	return undef if $s->{matches} =3D=3D -1;

	# Build swish line
	my @cmd;
	push @cmd, $s->{swish_cmd};
	push @cmd, qq{-x '$fmt_string'};
	push @cmd, "-c $s->{mv_base_directory}"
			if $s->{mv_base_directory};

	if(@{$s->{mv_search_file} || []} ) {
		push @cmd, "-f " . join(" ", @{$s->{mv_search_file}} );
	}
=09
	push @cmd, "-m $s->{mv_max_matches}" if $s->{mv_max_matches};
=09
	local($/) =3D $s->{mv_record_delim} || "\n";

	$s->save_specs();
=09
	my $spec =3D join ' ', @pats;

	$spec =3D~ s/[^-\w()"\s]+//g
		and $CGI::values{debug}
		and ::logError("Removed unsafe characters from search string");

	if(length($spec) < $s->{mv_min_string}) {
		my $msg =3D errmsg(
					"Swish search string less than minimum %s characters: %s",
					$s->{mv_min_string},
					$spec,
				);
		return $s->search_error($msg);
	}

	push @cmd, qq{-w $spec};

	if(length($spec) < $s->{mv_min_string}) {
		my $msg =3D ::errmsg (<<EOF, $s->{mv_min_string}, $spec);
Search strings must be at least %s characters.
You had '%s' as the operative characters  of your search strings.
EOF
		return $s->search_error($msg);
	}

	my $cmd =3D join ' ', @cmd;

	my $cwd =3D `pwd`;
	chomp($cwd);
#::logDebug("Swish command '$cmd' cwd=3D$cwd");

	open(SEARCH, "$cmd |")
		or ::logError( "Couldn't fork swish search '$cmd': $!"), next;
	#$s->adjust_delimiter(\*SEARCH) if $s->{mv_delimiter_auto};
	my $line;
	my $field_names;

#::logDebug("search after getting fields: self=3D" . ::uneval({%$s}));
	my $prospect;

	my $f =3D sub { 1 };

	eval {
		($limit_sub, $prospect) =3D $s->get_limit($f, 1);
	};

	$@  and  return $s->search_error("Limit subroutine creation: $@");

	$f =3D $prospect if $prospect;

	eval {($return_sub, $delayed_return) =3D $s->get_return(undef, 1)};

	$return_sub =3D sub { return [ split $s->{mv_index_delim}, shift(@_) ] };

	$@  and  return $s->search_error("Return subroutine creation: $@");

	my $field_names =3D join "\t", @{$s->{mv_field_names}};
	$field_names =3D~ s/^\s+//;
	my @laundry =3D (qw/mv_search_field mv_range_look mv_return_fields/);
	$s->hash_fields(
				[ split /\Q$s->{mv_index_delim}/, $field_names ],
				@laundry,
	);
	undef $field_names;

	if($limit_sub) {
		while(<SEARCH>) {
#::logDebug("swish line, limit_sub: $_");
			next if /^#/;
			last if $_ eq ".\n";
			$limit_sub->($_);
			push @out, $return_sub->($_);
		}
	}
	else {
		while(<SEARCH>) {
#::logDebug("swish line: $_");
			next if /^#/;
			last if $_ eq ".\n";
			push @out, $return_sub->($_);
		}
	}

	if(scalar(@out) =3D=3D 1 and $out[0][0] =3D~ s/^err\w*\W+//)  {
		$s->{matches} =3D -1;
		return $s->search_error($out[0][0]);
	}

	$s->{matches} =3D scalar(@out);

#::logDebug("gsearch before delayed return: self=3D" . ::Vend::Util::uneval=
_it({%$s}));
	if($s->{mv_sort_field} and  @{$s->{mv_sort_field}}) {
		$s->hash_fields($s->{mv_field_names}, qw/mv_sort_field/);
		@out =3D $s->sort_search_return(\@out);
	}
#::logDebug("after delayed return: self=3D" . ::Vend::Util::uneval_it({%$s}=
));

	if($s->{mv_unique}) {
		my %seen;
		@out =3D grep ! $seen{$_->[0]}++, @out;
		$s->{matches} =3D scalar(@out);
	}

	if ($s->{matches} > $s->{mv_matchlimit} and $s->{mv_matchlimit} > 0) {
		$s->save_more(\@out)
			or ::logError("Error saving matches: $!");
		if ($s->{mv_first_match}) {
			splice(@out,0,$s->{mv_first_match});
			$s->{mv_next_pointer} =3D $s->{mv_first_match} + $s->{mv_matchlimit};
			$s->{mv_next_pointer} =3D 0
				if $s->{mv_next_pointer} > $s->{matches};
		}
		$#out =3D $s->{mv_matchlimit} - 1;
	}

	if(! $s->{mv_return_reference}) {
		$s->{mv_results} =3D \@out;
#::logDebug("returning search: " . Vend::Util::uneval($s));
		return $s;
	}
	elsif($s->{mv_return_reference} eq 'LIST') {
		my $col =3D scalar @{$s->{mv_return_fields}};
		@out =3D map { join $s->{mv_return_delim}, @$_ } @out;
		$s->{mv_results} =3D join $s->{mv_record_delim}, @out;
	}
	else {
		my $col =3D scalar @{$s->{mv_return_fields}};
		my @col;
		my @names;
		@names =3D @{$s->{mv_field_names}};
		$names[0] eq '0' and $names[0] =3D 'code';
		my %hash;
		my $key;
		for (@out) {
			@col =3D split /$s->{mv_return_delim}/, $_, $col;
			$hash{$col[0]} =3D {};
			@{ $hash{$col[0]} } {@names} =3D @col;
		}
		$s->{mv_results} =3D \%hash;
	}
#::logDebug("returning search: " . Vend::Util::uneval($s));
	return $s;
}

# Unfortunate hack need for Safe searches
*escape         	=3D \&Vend::Search::escape;
*spec_check         =3D \&Vend::Search::spec_check;
*get_scalar         =3D \&Vend::Search::get_scalar;
*more_matches       =3D \&Vend::Search::more_matches;
*get_return         =3D \&Vend::Search::get_return;
*map_ops            =3D \&Vend::Search::map_ops;
*get_limit          =3D \&Vend::Search::get_limit;
*saved_params       =3D \&Vend::Search::saved_params;
*range_check        =3D \&Vend::Search::range_check;
*create_search_and  =3D \&Vend::Search::create_search_and;
*create_search_or   =3D \&Vend::Search::create_search_or;
*save_context       =3D \&Vend::Search::save_context;
*dump_options       =3D \&Vend::Search::dump_options;
*save_more          =3D \&Vend::Search::save_more;
*sort_search_return =3D \&Vend::Search::sort_search_return;
*get_scalar 		=3D \&Vend::Search::get_scalar;
*hash_fields 		=3D \&Vend::Search::hash_fields;
*save_specs 		=3D \&Vend::Search::save_specs;
*restore_specs 		=3D \&Vend::Search::restore_specs;
*splice_specs 		=3D \&Vend::Search::splice_specs;
*search_error 		=3D \&Vend::Search::search_error;
*save_more 			=3D \&Vend::Search::save_more;
*sort_search_return =3D \&Vend::Search::sort_search_return;

1;
__END__