[interchange] Increase SEO value of more links.
Mark Johnson
interchange-cvs at icdevgroup.org
Sat Jun 22 18:29:13 UTC 2013
commit 705dd23af8d60fe54f2138a33e0c4474ecc165f2
Author: Mark Johnson <mark at endpoint.com>
Date: Sat Jun 22 13:57:08 2013 -0400
Increase SEO value of more links.
New body tags for [more-list] can be used to "salt" more links
with SEO-driven content. These tags can turn a link like:
http://foo.com/scan/MM=[md5:x:x:x].html?mv_more_ip=1&mv_nextpage=results&pf=sql
into
http://foo.com/scan/leisure-bikes/traditional-bikes/page-2/MM=[md5:x:x:x].html?mv_more_ip=1&mv_nextpage=results&pf=sql
While the new tags will work inside any more list, they are really
only valuable in conjunction with the use of Permanent More, so
those higher-value links cached by search engines actually lead
to viable resources. See release notes and documentation on
Permanent More if you are unfamiliar with it, or you can see my blog
post on it at:
http://blog.endpoint.com/2012/01/interchange-search-caching-with.html
The new body tags are:
* [more-pretty-url]
This container tag includes the data of interest you'd like to plug
into the more-list URLs to make them "pretty". The value can contain
anything matching a-z, 0-9, /, _, and -. If any characters other
than those described are included, they are converted in contiguous
blocks to a single -. Any contiguous multiples of - (after
conversion) and / are collapsed into a single one. Finally, any -
and / are stripped from the head and tail.
What this does is allow a flexible vat into which you can throw
meaningful data (without having to test or sanitize at all) which
will produce a path fragment of relevant terms that is put into the
more-list URL between the "scan/" and "/MM=..." paths within overall
URL.
Example:
[more-list]
[more-pretty-url]men's clothing/pants/bermuda shorts[/more-pretty-url]
...
[/more-list]
Produces a more link with the following format:
http://foo.com/scan/men-s-clothing/pants/bermuda-shorts/MM=a52a53...html?...
Even though the body of a [more-list] is interpolated on reparse,
the contents within [more-pretty-url] specifically are interpolated.
It is expected that you would have some algorithm you would use to
determine the pretty parameters and those would be rendered via ITL
within the [more-pretty-url] tag.
* [more-incl-pageno]
Additionally, to provide more context and uniqueness to the URLs,
you can optionally request that the page number be included as the
last path segment of the "pretty" URL. This can either be the
default (which is "page-N"), or can be any customized string you
define, provided that you include the sprintf %d integer
interpolation flag so that the page number will be included.
Unlike [more-pretty-url], the contents of this tag are *not*
pre-interpolated.
Expanding the above example:
[more-list]
[more-pretty-url]men's clothing/pants/bermuda shorts[/more-pretty-url]
[more-incl-pageno][/more-incl-pageno]
...
[/more-list]
Produces a more link with the following format for page 4:
http://foo.com/scan/men-s-clothing/pants/bermuda-shorts/page-4/MM=a52a53...html?...
Or a customized page path with:
[more-list]
[more-pretty-url]men's clothing/pants/bermuda shorts[/more-pretty-url]
[more-incl-pageno]search page %d[/more-incl-pageno]
...
[/more-list]
produces new URL for page 4:
http://foo.com/scan/men-s-clothing/pants/bermuda-shorts/search-page-4/MM=a52a53...html?...
lib/Vend/Interpolate.pm | 63 ++++++++++++++++++++++++++++++++++++----------
1 files changed, 49 insertions(+), 14 deletions(-)
---
diff --git a/lib/Vend/Interpolate.pm b/lib/Vend/Interpolate.pm
index 668073f..6e2fe4e 100644
--- a/lib/Vend/Interpolate.pm
+++ b/lib/Vend/Interpolate.pm
@@ -3175,16 +3175,39 @@ sub find_sort {
$more_id,
$session,
$link_template,
+ $pretty_url,
+ $incl_pageno,
);
sub more_link_template {
- my ($anchor, $arg, $form_arg) = @_;
+ my ($anchor, $arg, $form_arg, $pageno) = @_;
- my $url = tag_area(undef, undef, {
- search => "MM=$arg",
- form => $form_arg,
- match_security => 1,
- });
+#::logDebug('$pretty_url is %s', $pretty_url);
+ my $this_pretty = $pretty_url || '';
+
+ if ($incl_pageno && $pageno) {
+ my $pg_tmpl = $incl_pageno eq '1' ? 'page %d' : $incl_pageno;
+ $this_pretty .= sprintf ("/$pg_tmpl", $pageno);
+ }
+
+ for ($this_pretty) {
+ s{[^\w/]+}{-}g;
+ s{/{2,}}{/}g;
+ s{^[-/]+}{}g;
+ s{[-/]+$}{}g;
+ }
+
+#::logDebug('$this_pretty after regexes: %s', $this_pretty);
+ $this_pretty &&= "$this_pretty/";
+
+ my $url = tag_area(
+ "scan/${this_pretty}MM=$arg",
+ undef,
+ {
+ form => $form_arg,
+ match_security => 1,
+ }
+ );
my $lt = $link_template;
$lt =~ s/\$URL\$/$url/g;
@@ -3215,7 +3238,7 @@ sub more_link {
else {
$pa =~ s/__BORDER__/$border/e;
$arg = "$session:$next:$last:$chunk$perm";
- $list .= more_link_template($pa, $arg, $form_arg) . ' ';
+ $list .= more_link_template($pa, $arg, $form_arg, $inc) . ' ';
}
return $list;
}
@@ -3245,6 +3268,14 @@ sub tag_more_list {
my($first_anchor,$last_anchor);
my %hash;
+ ($pretty_url, $incl_pageno) = ();
+ if ($r =~ m{\[more[-_]pretty[-_]url\]}i) {
+#::logDebug('$r matched on more-pretty-url');
+ $r =~ s{\[more[-_]pretty[-_]url\]($All)\[/more[-_]pretty[-_]url\]}{}i
+ and $pretty_url = $q->{more_pretty_url} ||= ::interpolate_html($1);
+ $r =~ s{\[more[-_]incl[-_]pageno\]($All)\[/more[-_]incl[-_]pageno\]}{}i
+ and $incl_pageno = $q->{more_incl_pageno} ||= $1 || '1';
+ }
$session = $q->{mv_cache_key};
my $first = $q->{mv_first_match} || 0;
@@ -3310,7 +3341,7 @@ sub tag_more_list {
$arg .= ':0:';
$arg .= $chunk - 1;
$arg .= ":$chunk$perm";
- $hash{first_link} = more_link_template($first_anchor, $arg, $form_arg);
+ $hash{first_link} = more_link_template($first_anchor, $arg, $form_arg, 1);
}
unless ($prev_anchor) {
@@ -3331,7 +3362,7 @@ sub tag_more_list {
$arg .= ':';
$arg .= $first - 1;
$arg .= ":$chunk$perm";
- $hash{prev_link} = more_link_template($prev_anchor, $arg, $form_arg);
+ $hash{prev_link} = more_link_template($prev_anchor, $arg, $form_arg, $current && $current - 1);
}
}
@@ -3355,7 +3386,7 @@ sub tag_more_list {
$last = $next + $chunk - 1;
$last = $last > ($total - 1) ? $total - 1 : $last;
$arg = "$session:$next:$last:$chunk$perm";
- $hash{next_link} = more_link_template($next_anchor, $arg, $form_arg);
+ $hash{next_link} = more_link_template($next_anchor, $arg, $form_arg, $current && $current + 1);
# Last link can appear when next link is valid
if($r =~ s:\[last[-_]anchor\]($All)\[/last[-_]anchor\]::i) {
@@ -3368,7 +3399,7 @@ sub tag_more_list {
$last = $total - 1;
my $last_beg_idx = $total - ($total % $chunk || $chunk);
$arg = "$session:$last_beg_idx:$last:$chunk$perm";
- $hash{last_link} = more_link_template($last_anchor, $arg, $form_arg);
+ $hash{last_link} = more_link_template($last_anchor, $arg, $form_arg, $chunk && ceil($total / $chunk));
}
}
else {
@@ -4738,6 +4769,7 @@ sub region {
#::logDebug("region: opt:\n" . uneval($opt) . "\npage:" . substr($page,0,100));
+ my $save_more;
if($opt->{ml} and ! defined $obj->{mv_matchlimit} ) {
$obj->{mv_matchlimit} = $opt->{ml};
$obj->{mv_more_decade} = $opt->{md};
@@ -4747,9 +4779,7 @@ sub region {
$obj->{mv_first_match} = $opt->{fm} if $opt->{fm};
$obj->{mv_search_page} = $opt->{sp} if $opt->{sp};
$obj->{prefix} = $opt->{prefix} if $opt->{prefix};
- my $out = delete $obj->{mv_results};
- Vend::Search::save_more($obj, $out);
- $obj->{mv_results} = $out;
+ $save_more = 1;
}
$opt->{prefix} = $obj->{prefix} if $obj->{prefix};
@@ -4787,6 +4817,11 @@ sub region {
$page =~ s:\[($lprefix)\]($Some)\[/\1\]:labeled_list($opt,$2,$obj):ige
or $page = labeled_list($opt,$page,$obj);
#::logDebug("past labeled_list");
+ if ($save_more) {
+ my $out = delete $obj->{mv_results};
+ Vend::Search::save_more($obj, $out);
+ $obj->{mv_results} = $out;
+ }
return $page;
}
More information about the interchange-cvs
mailing list