[interchange] Increase SEO value of more links.

Mark Johnson interchange-cvs at icdevgroup.org
Sat Jun 22 18:29:13 UTC 2013


commit 705dd23af8d60fe54f2138a33e0c4474ecc165f2
Author: Mark Johnson <mark at endpoint.com>
Date:   Sat Jun 22 13:57:08 2013 -0400

    Increase SEO value of more links.
    
    New body tags for [more-list] can be used to "salt" more links
    with SEO-driven content. These tags can turn a link like:
    
    http://foo.com/scan/MM=[md5:x:x:x].html?mv_more_ip=1&mv_nextpage=results&pf=sql
    
    into
    
    http://foo.com/scan/leisure-bikes/traditional-bikes/page-2/MM=[md5:x:x:x].html?mv_more_ip=1&mv_nextpage=results&pf=sql
    
    While the new tags will work inside any more list, they are really
    only valuable in conjunction with the use of Permanent More, so
    those higher-value links cached by search engines actually lead
    to viable resources. See release notes and documentation on
    Permanent More if you are unfamiliar with it, or you can see my blog
    post on it at:
    
    http://blog.endpoint.com/2012/01/interchange-search-caching-with.html
    
    The new body tags are:
    
    * [more-pretty-url]
    
    This container tag includes the data of interest you'd like to plug
    into the more-list URLs to make them "pretty". The value can contain
    anything matching a-z, 0-9, /, _, and -. If any characters other
    than those described are included, they are converted in contiguous
    blocks to a single -. Any contiguous multiples of - (after
    conversion) and / are collapsed into a single one. Finally, any -
    and / are stripped from the head and tail.
    
    What this does is allow a flexible vat into which you can throw
    meaningful data (without having to test or sanitize at all) which
    will produce a path fragment of relevant terms that is put into the
    more-list URL between the "scan/" and "/MM=..." paths within overall
    URL.
    
    Example:
    
    [more-list]
    [more-pretty-url]men's clothing/pants/bermuda shorts[/more-pretty-url]
    ...
    [/more-list]
    
    Produces a more link with the following format:
    
    http://foo.com/scan/men-s-clothing/pants/bermuda-shorts/MM=a52a53...html?...
    
    Even though the body of a [more-list] is interpolated on reparse,
    the contents within [more-pretty-url] specifically are interpolated.
    It is expected that you would have some algorithm you would use to
    determine the pretty parameters and those would be rendered via ITL
    within the [more-pretty-url] tag.
    
    * [more-incl-pageno]
    
    Additionally, to provide more context and uniqueness to the URLs,
    you can optionally request that the page number be included as the
    last path segment of the "pretty" URL. This can either be the
    default (which is "page-N"), or can be any customized string you
    define, provided that you include the sprintf %d integer
    interpolation flag so that the page number will be included.
    
    Unlike [more-pretty-url], the contents of this tag are *not*
    pre-interpolated.
    
    Expanding the above example:
    
    [more-list]
    [more-pretty-url]men's clothing/pants/bermuda shorts[/more-pretty-url]
    [more-incl-pageno][/more-incl-pageno]
    ...
    [/more-list]
    
    Produces a more link with the following format for page 4:
    
    http://foo.com/scan/men-s-clothing/pants/bermuda-shorts/page-4/MM=a52a53...html?...
    
    Or a customized page path with:
    
    [more-list]
    [more-pretty-url]men's clothing/pants/bermuda shorts[/more-pretty-url]
    [more-incl-pageno]search page %d[/more-incl-pageno]
    ...
    [/more-list]
    
    produces new URL for page 4:
    
    http://foo.com/scan/men-s-clothing/pants/bermuda-shorts/search-page-4/MM=a52a53...html?...

 lib/Vend/Interpolate.pm |   63 ++++++++++++++++++++++++++++++++++++----------
 1 files changed, 49 insertions(+), 14 deletions(-)
---
diff --git a/lib/Vend/Interpolate.pm b/lib/Vend/Interpolate.pm
index 668073f..6e2fe4e 100644
--- a/lib/Vend/Interpolate.pm
+++ b/lib/Vend/Interpolate.pm
@@ -3175,16 +3175,39 @@ sub find_sort {
 		$more_id,
 		$session,
 		$link_template,
+		$pretty_url,
+		$incl_pageno,
 		);
 
 sub more_link_template {
-	my ($anchor, $arg, $form_arg) = @_;
+	my ($anchor, $arg, $form_arg, $pageno) = @_;
 
-	my $url = tag_area(undef, undef, {
-	    search         => "MM=$arg",
-	    form           => $form_arg,
-	    match_security => 1,
-	});
+#::logDebug('$pretty_url is %s', $pretty_url);
+    my $this_pretty = $pretty_url || '';
+
+    if ($incl_pageno && $pageno) {
+        my $pg_tmpl = $incl_pageno eq '1' ? 'page %d' : $incl_pageno;
+        $this_pretty .= sprintf ("/$pg_tmpl", $pageno);
+    }
+
+    for ($this_pretty) {
+        s{[^\w/]+}{-}g;
+        s{/{2,}}{/}g;
+        s{^[-/]+}{}g;
+        s{[-/]+$}{}g;
+    }
+
+#::logDebug('$this_pretty after regexes: %s', $this_pretty);
+    $this_pretty &&= "$this_pretty/";
+
+    my $url = tag_area(
+        "scan/${this_pretty}MM=$arg",
+        undef,
+        {
+            form           => $form_arg,
+            match_security => 1,
+        }
+    );
 
 	my $lt = $link_template;
 	$lt =~ s/\$URL\$/$url/g;
@@ -3215,7 +3238,7 @@ sub more_link {
 	else {
 		$pa =~ s/__BORDER__/$border/e;
 		$arg = "$session:$next:$last:$chunk$perm";
-		$list .= more_link_template($pa, $arg, $form_arg) . ' ';
+		$list .= more_link_template($pa, $arg, $form_arg, $inc) . ' ';
 	}
 	return $list;
 }
@@ -3245,6 +3268,14 @@ sub tag_more_list {
 	my($first_anchor,$last_anchor);
 	my %hash;
 
+    ($pretty_url, $incl_pageno) = ();
+    if ($r =~ m{\[more[-_]pretty[-_]url\]}i) {
+#::logDebug('$r matched on more-pretty-url');
+        $r =~ s{\[more[-_]pretty[-_]url\]($All)\[/more[-_]pretty[-_]url\]}{}i
+            and $pretty_url = $q->{more_pretty_url} ||= ::interpolate_html($1);
+        $r =~ s{\[more[-_]incl[-_]pageno\]($All)\[/more[-_]incl[-_]pageno\]}{}i
+            and $incl_pageno = $q->{more_incl_pageno} ||= $1 || '1';
+    }
 
 	$session = $q->{mv_cache_key};
 	my $first = $q->{mv_first_match} || 0;
@@ -3310,7 +3341,7 @@ sub tag_more_list {
 			$arg .= ':0:';
 			$arg .= $chunk - 1;
 			$arg .= ":$chunk$perm";
-			$hash{first_link} = more_link_template($first_anchor, $arg, $form_arg);
+			$hash{first_link} = more_link_template($first_anchor, $arg, $form_arg, 1);
 		}
 
 		unless ($prev_anchor) {
@@ -3331,7 +3362,7 @@ sub tag_more_list {
 			$arg .= ':';
 			$arg .= $first - 1;
 			$arg .= ":$chunk$perm";
-			$hash{prev_link} = more_link_template($prev_anchor, $arg, $form_arg);
+			$hash{prev_link} = more_link_template($prev_anchor, $arg, $form_arg, $current && $current - 1);
 		}
 
 	}
@@ -3355,7 +3386,7 @@ sub tag_more_list {
 		$last = $next + $chunk - 1;
 		$last = $last > ($total - 1) ? $total - 1 : $last;
 		$arg = "$session:$next:$last:$chunk$perm";
-		$hash{next_link} = more_link_template($next_anchor, $arg, $form_arg);
+		$hash{next_link} = more_link_template($next_anchor, $arg, $form_arg, $current && $current + 1);
 
  		# Last link can appear when next link is valid
 		if($r =~ s:\[last[-_]anchor\]($All)\[/last[-_]anchor\]::i) {
@@ -3368,7 +3399,7 @@ sub tag_more_list {
 			$last = $total - 1;
 			my $last_beg_idx = $total - ($total % $chunk || $chunk);
 			$arg = "$session:$last_beg_idx:$last:$chunk$perm";
-			$hash{last_link} = more_link_template($last_anchor, $arg, $form_arg);
+			$hash{last_link} = more_link_template($last_anchor, $arg, $form_arg, $chunk && ceil($total / $chunk));
 		}
 	}
 	else {
@@ -4738,6 +4769,7 @@ sub region {
 
 #::logDebug("region: opt:\n" . uneval($opt) . "\npage:" . substr($page,0,100));
 
+	my $save_more;
 	if($opt->{ml} and ! defined $obj->{mv_matchlimit} ) {
 		$obj->{mv_matchlimit} = $opt->{ml};
 		$obj->{mv_more_decade} = $opt->{md};
@@ -4747,9 +4779,7 @@ sub region {
 		$obj->{mv_first_match} = $opt->{fm} if $opt->{fm};
 		$obj->{mv_search_page} = $opt->{sp} if $opt->{sp};
 		$obj->{prefix} = $opt->{prefix} if $opt->{prefix};
-		my $out = delete $obj->{mv_results};
-		Vend::Search::save_more($obj, $out);
-		$obj->{mv_results} = $out;
+		$save_more = 1;
 	}
 
 	$opt->{prefix} = $obj->{prefix} if $obj->{prefix};
@@ -4787,6 +4817,11 @@ sub region {
 	$page =~ s:\[($lprefix)\]($Some)\[/\1\]:labeled_list($opt,$2,$obj):ige
 		or $page = labeled_list($opt,$page,$obj);
 #::logDebug("past labeled_list");
+    if ($save_more) {
+        my $out = delete $obj->{mv_results};
+        Vend::Search::save_more($obj, $out);
+        $obj->{mv_results} = $out;
+    }
 
     return $page;
 }



More information about the interchange-cvs mailing list