[interchange-cvs] interchange - kwalsh modified 3 files

interchange-core@icdevgroup.org interchange-core@icdevgroup.org
Wed Oct 30 18:41:06 2002


User:      kwalsh
Date:      2002-10-30 23:40:29 GMT
Modified:  lib/Vend Session.pm Server.pm Config.pm
Log:
	* Added a RobotHost directive to identify robots by hostname.
	  This is in addition to the existing RobotUA and RobotIP
	  identification lists.

	* RobotHost and RobotIP are handled by the new list_wildcard_full
	  routine.  This routine does not perform a substring match, so
	  either the full string must be specified, or something along
	  the lines of *.domain.com must be used instead.

	* The RobotUA handler still performs a substring match using
	  the list_wildcard routine.

	* Both list_wildcard and list_wildcard_full now generate a
	  case-insensitive regex.

	* A new 'spider' key has been added to the session, which may
	  be accessed using [data session spider], [if session spider]
	  and $Session->{spider} etc.  Please treat this facility with
	  care, as some search engines take a dim view of so-called
	  "search engineering."

Revision  Changes    Path
2.8       +3 -2      interchange/lib/Vend/Session.pm


rev 2.8, prev_rev 2.7
Index: Session.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/Session.pm,v
retrieving revision 2.7
retrieving revision 2.8
diff -u -r2.7 -r2.8
--- Session.pm	16 Sep 2002 23:06:31 -0000	2.7
+++ Session.pm	30 Oct 2002 23:40:29 -0000	2.8
@@ -1,6 +1,6 @@
 # Vend::Session - Interchange session routines
 #
-# $Id: Session.pm,v 2.7 2002/09/16 23:06:31 mheins Exp $
+# $Id: Session.pm,v 2.8 2002/10/30 23:40:29 kwalsh Exp $
 # 
 # Copyright (C) 1996-2002 Red Hat, Inc. <interchange@redhat.com>
 #
@@ -26,7 +26,7 @@
 require Exporter;
 
 use vars qw($VERSION);
-$VERSION = substr(q$Revision: 2.7 $, 10);
+$VERSION = substr(q$Revision: 2.8 $, 10);
 
 @ISA = qw(Exporter);
 
@@ -450,6 +450,7 @@
 		'arg'		=> $Vend::Argument,
 		'browser'	=> $CGI::useragent,
 		'referer'	=> $CGI::referer,
+		'spider'	=> $CGI::values{mv_tmp_session},
 		'scratch'	=> { %{$Vend::Cfg->{ScratchDefault}} },
 		'values'	=> { %{$Vend::Cfg->{ValuesDefault}} },
 		'carts'		=> {main => []},



2.15      +10 -3     interchange/lib/Vend/Server.pm


rev 2.15, prev_rev 2.14
Index: Server.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/Server.pm,v
retrieving revision 2.14
retrieving revision 2.15
diff -u -r2.14 -r2.15
--- Server.pm	16 Sep 2002 23:06:31 -0000	2.14
+++ Server.pm	30 Oct 2002 23:40:29 -0000	2.15
@@ -1,6 +1,6 @@
 # Vend::Server - Listen for Interchange CGI requests as a background server
 #
-# $Id: Server.pm,v 2.14 2002/09/16 23:06:31 mheins Exp $
+# $Id: Server.pm,v 2.15 2002/10/30 23:40:29 kwalsh Exp $
 #
 # Copyright (C) 1996-2002 Red Hat, Inc. <interchange@redhat.com>
 #
@@ -25,7 +25,7 @@
 package Vend::Server;
 
 use vars qw($VERSION);
-$VERSION = substr(q$Revision: 2.14 $, 10);
+$VERSION = substr(q$Revision: 2.15 $, 10);
 
 use POSIX qw(setsid strftime);
 use Vend::Util;
@@ -156,8 +156,11 @@
 }
 
 sub map_misc_cgi {
-	$CGI::user = $CGI::remote_user;
+	if (!$CGI::remote_host && $CGI::remote_addr){
+		$CGI::remote_host = gethostbyaddr(Socket::inet_aton($CGI::remote_addr),Socket::AF_INET);
+	}
 	$CGI::host = $CGI::remote_host || $CGI::remote_addr;
+	$CGI::user = $CGI::remote_user;
 
 	$CGI::script_path = $CGI::script_name;
 	$CGI::script_name = $CGI::server_host . $CGI::script_path
@@ -196,6 +199,10 @@
 #::logDebug("Check robot UA=$Global::RobotUA IP=$Global::RobotIP");
 	if ($Global::RobotUA and $CGI::useragent =~ $Global::RobotUA) {
 #::logDebug("It is a robot by UA!");
+		$CGI::values{mv_tmp_session} = 1;
+	}
+	elsif ($Global::RobotHost and $CGI::remote_host =~ $Global::RobotHost) {
+#::logDebug("It is a robot by host!");
 		$CGI::values{mv_tmp_session} = 1;
 	}
 	elsif ($Global::RobotIP and $CGI::remote_addr =~ $Global::RobotIP) {



2.76      +30 -23    interchange/lib/Vend/Config.pm


rev 2.76, prev_rev 2.75
Index: Config.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/Config.pm,v
retrieving revision 2.75
retrieving revision 2.76
diff -u -r2.75 -r2.76
--- Config.pm	27 Oct 2002 03:53:59 -0000	2.75
+++ Config.pm	30 Oct 2002 23:40:29 -0000	2.76
@@ -1,6 +1,6 @@
 # Vend::Config - Configure Interchange
 #
-# $Id: Config.pm,v 2.75 2002/10/27 03:53:59 mheins Exp $
+# $Id: Config.pm,v 2.76 2002/10/30 23:40:29 kwalsh Exp $
 #
 # Copyright (C) 1996-2002 Red Hat, Inc. <interchange@redhat.com>
 #
@@ -45,7 +45,7 @@
 use Vend::Util;
 use Vend::Data;
 
-$VERSION = substr(q$Revision: 2.75 $, 10);
+$VERSION = substr(q$Revision: 2.76 $, 10);
 
 my %CDname;
 
@@ -289,8 +289,9 @@
 	['TemplateDir',      'root_dir_array', 	 ''],
 	['DomainTail',		 'yesno',            'Yes'],
 	['AcrossLocks',		 'yesno',            'No'],
-	['RobotIP',			 'list_wildcard',    ''],
 	['RobotUA',			 'list_wildcard',    ''],
+	['RobotIP',			 'list_wildcard_full',    ''],
+	['RobotHost',			 'list_wildcard_full',    ''],
 	['TolerateGet',		 'yesno',            'No'],
 	['PIDcheck',		 'integer',          '0'],
 	['LockoutCommand',    undef,             ''],
@@ -1507,6 +1508,26 @@
 					);
 }
 
+sub get_wildcard_list {
+	my($var, $value) = @_;
+
+	$value =~ s/^\s+//;
+	$value =~ s/\s+$//;
+	return '' if ! $value;
+
+	if($value !~ /\|/) {
+		$value =~ s/([\\\+\|\[\]\(\){}])/\\$1/g;
+		$value =~ s/\./\\./g;
+		$value =~ s/\*/.*/g;
+		$value =~ s/\?/./g;
+		my @items = grep /\S/, split /\s*,\s*/, $value;
+		s/\s+/\\s+/g for (@items);
+		$value = join '|', @items;
+	}
+	$value = parse_regex($var, $value);
+	return $value;
+}
+
 # Set up an ActionMap or FormAction
 sub parse_action {
 	my ($var, $value, $mapped) = @_;
@@ -2406,27 +2427,13 @@
 }
 
 sub parse_list_wildcard {
-	my($var, $value) = @_;
-
-	$value =~ s/^\s+//;
-	$value =~ s/\s+$//;
-	return '' if ! $value;
+	my $value = get_wildcard_list(@_);
+	return qr/$value/i;
+}
 
-	if($value !~ /\|/) {
-		my @items = split /\s*,\s*/, $value;
-		my $iplist = $value =~ /^[\d.,\s]+$/;
-		for(@items) {
-			next unless $_;
-			s/\./\\./g;
-			s/\*/.*/g;
-			s/\?/./g;
-			s/\s+/\\s+/g;
-			s/^/^/ if $iplist;
-		}
-		$value = join '|', @items;
-	}
-	$value = parse_regex($var, $value);
-	return qr/$value/;
+sub parse_list_wildcard_full {
+	my $value = '^(' . get_wildcard_list(@_) . ')$';
+	return qr/$value/i;
 }
 
 # Make a dos-ish regex into a Perl regex, check for errors